[llvm] [AArch64] Legalize fp128 scalars as libcalls for G_FCMP (PR #98452)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 11 02:26:02 PDT 2024
https://github.com/Him188 created https://github.com/llvm/llvm-project/pull/98452
- Generate libcall for supported predicates.
- Generate unsupported predicates as combinations of supported predicates.
GISel now generates the same code as SDAG, however, note the difference in the `one` case.
>From a294242256e93cbb863d59503aede635958aca77 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Wed, 3 Jul 2024 17:45:07 +0100
Subject: [PATCH] [AArch64] Legalize fp128 scalars for G_FCMP
- Generate libcall for supported predicates.
- Generate unsupported predicates as combinations of supported predicates.
---
.../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 3 +
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 153 ++++-
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 5 +-
llvm/test/CodeGen/AArch64/arm64-ccmp.ll | 70 ++-
llvm/test/CodeGen/AArch64/fcmp-fp128.ll | 560 ++++++++++++++++++
llvm/test/CodeGen/AArch64/fcmp.ll | 353 ++++++-----
6 files changed, 977 insertions(+), 167 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/fcmp-fp128.ll
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 284f434fbb9b0..b8a0d94eeda2e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -280,6 +280,9 @@ class LegalizerHelper {
LegalizeResult createResetStateLibcall(MachineIRBuilder &MIRBuilder,
MachineInstr &MI,
LostDebugLocObserver &LocObserver);
+ LegalizeResult createFCMPLibcall(MachineIRBuilder &MIRBuilder,
+ MachineInstr &MI,
+ LostDebugLocObserver &LocObserver);
MachineInstrBuilder
getNeutralElementForVecReduce(unsigned Opcode, MachineIRBuilder &MIRBuilder,
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 86de1f3be9047..04a266daa9855 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -723,8 +723,7 @@ static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
if (MemType.isVector())
return RTLIB::UNKNOWN_LIBCALL;
-#define LCALLS(A, B) \
- { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
+#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
#define LCALL5(A) \
LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
switch (Opc) {
@@ -980,6 +979,150 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
LocObserver, nullptr);
}
+/// Returns the corresponding libcall for the given Pred and
+/// the ICMP predicate that should be generated to compare with #0
+/// after the libcall.
+static std::pair<RTLIB::Libcall, CmpInst::Predicate>
+getFCMPLibcallDesc(const CmpInst::Predicate Pred) {
+
+ switch (Pred) {
+ case CmpInst::FCMP_OEQ:
+ return {RTLIB::OEQ_F128, CmpInst::ICMP_EQ};
+ case CmpInst::FCMP_UNE:
+ return {RTLIB::UNE_F128, CmpInst::ICMP_NE};
+ case CmpInst::FCMP_OGE:
+ return {RTLIB::OGE_F128, CmpInst::ICMP_SGE};
+ case CmpInst::FCMP_OLT:
+ return {RTLIB::OLT_F128, CmpInst::ICMP_SLT};
+ case CmpInst::FCMP_OLE:
+ return {RTLIB::OLE_F128, CmpInst::ICMP_SLE};
+ case CmpInst::FCMP_OGT:
+ return {RTLIB::OGT_F128, CmpInst::ICMP_SGT};
+ case CmpInst::FCMP_UNO:
+ return {RTLIB::UO_F128, CmpInst::ICMP_NE};
+ default:
+ return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
+ MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) {
+ auto &MF = MIRBuilder.getMF();
+ auto &Ctx = MF.getFunction().getContext();
+
+ LLT OpLLT = MRI.getType(MI.getOperand(2).getReg());
+ if (OpLLT != LLT::scalar(128) ||
+ OpLLT != MRI.getType(MI.getOperand(3).getReg()))
+ return UnableToLegalize;
+
+ Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
+
+ // Libcall always return i32
+ constexpr LLT I32LLT = LLT::scalar(32);
+ constexpr LLT PredTy = LLT::scalar(1);
+
+ const Register DstReg = MI.getOperand(0).getReg();
+ const Register Op1 = MI.getOperand(2).getReg();
+ const Register Op2 = MI.getOperand(3).getReg();
+ const auto Pred =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+ // Generates a libcall followed by ICMP
+ const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
+ const CmpInst::Predicate ICmpPred) -> Register {
+ Register Temp = MRI.createGenericVirtualRegister(I32LLT);
+ // Generate libcall, storing result into Temp
+ const auto Status =
+ createLibcall(MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
+ {{Op1, OpType, 0}, {Op2, OpType, 1}}, LocObserver, &MI);
+ if (!Status)
+ return MCRegister::NoRegister;
+
+ // FCMP libcall always returns an i32, we need to compare it with #0 to get
+ // the final result.
+ const Register Res = MRI.createGenericVirtualRegister(PredTy);
+ MIRBuilder.buildICmp(ICmpPred, Res, Temp,
+ MIRBuilder.buildConstant(I32LLT, 0));
+ return Res;
+ };
+
+ // Simple case if we have a direct mapping from predicate to libcall
+ if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Pred);
+ Libcall != RTLIB::UNKNOWN_LIBCALL &&
+ ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
+ if (const auto Res = BuildLibcall(Libcall, ICmpPred)) {
+ MIRBuilder.buildCopy(DstReg, Res);
+ return Legalized;
+ }
+ return UnableToLegalize;
+ }
+
+ // No direct mapping found, should be generated as combination of libcalls.
+
+ switch (Pred) {
+ case CmpInst::FCMP_UEQ: {
+ // FCMP_UEQ: unordered or equal
+ // Convert into (FCMP_OEQ || FCMP_UNO).
+
+ const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ);
+ const auto Oeq = BuildLibcall(OeqLibcall, OeqPred);
+
+ const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO);
+ const auto Uno = BuildLibcall(UnoLibcall, UnoPred);
+
+ MIRBuilder.buildCopy(DstReg, MIRBuilder.buildOr(PredTy, Oeq, Uno));
+ break;
+ }
+ case CmpInst::FCMP_ONE: {
+ // FCMP_ONE: ordered and operands are unequal
+ // Convert into (!FCMP_OEQ && !FCMP_UNO).
+
+ // We inverse the predicate instead of generating a NOT
+ // to save one instruciton.
+ // On AArch64 isel can even select two cmp into a single ccmp.
+ const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ);
+ const auto NotOeq =
+ BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred));
+
+ const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO);
+ const auto NotUno =
+ BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred));
+
+ if (NotOeq && NotUno)
+ MIRBuilder.buildCopy(DstReg, MIRBuilder.buildAnd(PredTy, NotOeq, NotUno));
+ else
+ return UnableToLegalize;
+
+ break;
+ }
+ case CmpInst::FCMP_ULT:
+ case CmpInst::FCMP_UGE:
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_ULE:
+ case CmpInst::FCMP_ORD: {
+ // Convert into: !(inverse(Pred))
+ // E.g. FCMP_ULT becomes !FCMP_OGE
+ // This is equivalent to the following, but saves some instructions.
+ // MIRBuilder.buildNot(
+ // PredTy,
+ // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
+ // Op1, Op2));
+ const auto [InversedLibcall, InversedPred] =
+ getFCMPLibcallDesc(CmpInst::getInversePredicate(Pred));
+ MIRBuilder.buildCopy(
+ DstReg, BuildLibcall(InversedLibcall,
+ CmpInst::getInversePredicate(InversedPred)));
+ break;
+ }
+ default:
+ return UnableToLegalize;
+ }
+
+ return Legalized;
+}
+
// The function is used to legalize operations that set default environment
// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
// On most targets supported in glibc FE_DFL_MODE is defined as
@@ -1120,6 +1263,12 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
return Status;
break;
}
+ case TargetOpcode::G_FCMP: {
+ LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI: {
// FIXME: Support other types
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index c6eb4d2b3ec78..511e47354cead 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -560,7 +560,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
})
.widenScalarOrEltToNextPow2(1)
.clampScalar(0, s32, s32)
- .clampScalarOrElt(1, MinFPScalar, s64)
+ .clampScalarOrElt(1, MinFPScalar, s128)
.minScalarEltSameAsIf(
[=](const LegalityQuery &Query) {
const LLT &Ty = Query.Types[0];
@@ -572,7 +572,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(1, v4s16, v8s16)
.clampNumElements(1, v2s32, v4s32)
.clampMaxNumElements(1, s64, 2)
- .moreElementsToNextPow2(1);
+ .moreElementsToNextPow2(1)
+ .libcallFor({{s32, s128}});
// Extensions
auto ExtLegalFunc = [=](const LegalityQuery &Query) {
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index b6702bba1598c..e951c80e30845 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp | FileCheck %s --check-prefixes=CHECK,SDISEL
-; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=CHECK,GISEL
target triple = "arm64-apple-ios"
define i32 @single_same(i32 %a, i32 %b) nounwind ssp {
@@ -950,29 +950,51 @@ define i32 @half_select_and_olt_one(half %v0, half %v1, half %v2, half %v3, i32
; Also verify that we don't try to generate f128 FCCMPs, using RT calls instead.
define i32 @f128_select_and_olt_oge(fp128 %v0, fp128 %v1, fp128 %v2, fp128 %v3, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: f128_select_and_olt_oge:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: sub sp, sp, #80
-; CHECK-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill
-; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
-; CHECK-NEXT: mov x19, x1
-; CHECK-NEXT: mov x20, x0
-; CHECK-NEXT: stp q2, q3, [sp] ; 32-byte Folded Spill
-; CHECK-NEXT: bl ___lttf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w21, lt
-; CHECK-NEXT: ldp q0, q1, [sp] ; 32-byte Folded Reload
-; CHECK-NEXT: bl ___getf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w8, ge
-; CHECK-NEXT: tst w8, w21
-; CHECK-NEXT: csel w0, w20, w19, ne
-; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
-; CHECK-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #80
-; CHECK-NEXT: ret
+; SDISEL-LABEL: f128_select_and_olt_oge:
+; SDISEL: ; %bb.0:
+; SDISEL-NEXT: sub sp, sp, #80
+; SDISEL-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; SDISEL-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; SDISEL-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; SDISEL-NEXT: mov x19, x1
+; SDISEL-NEXT: mov x20, x0
+; SDISEL-NEXT: stp q2, q3, [sp] ; 32-byte Folded Spill
+; SDISEL-NEXT: bl ___lttf2
+; SDISEL-NEXT: cmp w0, #0
+; SDISEL-NEXT: cset w21, lt
+; SDISEL-NEXT: ldp q0, q1, [sp] ; 32-byte Folded Reload
+; SDISEL-NEXT: bl ___getf2
+; SDISEL-NEXT: cmp w0, #0
+; SDISEL-NEXT: cset w8, ge
+; SDISEL-NEXT: tst w8, w21
+; SDISEL-NEXT: csel w0, w20, w19, ne
+; SDISEL-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; SDISEL-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; SDISEL-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; SDISEL-NEXT: add sp, sp, #80
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: f128_select_and_olt_oge:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: sub sp, sp, #80
+; GISEL-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; GISEL-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; GISEL-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; GISEL-NEXT: stp q3, q2, [sp] ; 32-byte Folded Spill
+; GISEL-NEXT: mov x19, x0
+; GISEL-NEXT: mov x20, x1
+; GISEL-NEXT: bl ___lttf2
+; GISEL-NEXT: mov x21, x0
+; GISEL-NEXT: ldp q1, q0, [sp] ; 32-byte Folded Reload
+; GISEL-NEXT: bl ___getf2
+; GISEL-NEXT: cmp w21, #0
+; GISEL-NEXT: ccmp w0, #0, #8, lt
+; GISEL-NEXT: csel w0, w19, w20, ge
+; GISEL-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; GISEL-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; GISEL-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; GISEL-NEXT: add sp, sp, #80
+; GISEL-NEXT: ret
%c0 = fcmp olt fp128 %v0, %v1
%c1 = fcmp oge fp128 %v2, %v3
%cr = and i1 %c1, %c0
diff --git a/llvm/test/CodeGen/AArch64/fcmp-fp128.ll b/llvm/test/CodeGen/AArch64/fcmp-fp128.ll
new file mode 100644
index 0000000000000..503cb8c533bab
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fcmp-fp128.ll
@@ -0,0 +1,560 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK-GI
+
+; Checks generated libcalls for fp128 types
+
+define double @oeq(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: oeq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: bl __eqtf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, eq
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: oeq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __eqtf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, eq
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp oeq fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
+define double @ogt(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: ogt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: bl __gttf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, gt
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ogt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, gt
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp ogt fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
+define double @olt(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: olt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, lt
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: olt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, lt
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp olt fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
+define double @ole(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: ole:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: bl __letf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, le
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ole:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __letf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, le
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp ole fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
+define double @one(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: one:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #64
+; CHECK-SD-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT: bl __eqtf2
+; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: cset w19, ne
+; CHECK-SD-NEXT: bl __unordtf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: cset w8, eq
+; CHECK-SD-NEXT: tst w8, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, ne
+; CHECK-SD-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #64
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: one:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #64
+; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __eqtf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: mov w19, w0
+; CHECK-GI-NEXT: bl __unordtf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ccmp w19, #0, #4, eq
+; CHECK-GI-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, ne
+; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #64
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp one fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
+define double @ord(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: ord:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: bl __unordtf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, eq
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ord:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __unordtf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, eq
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp ord fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
+define double @uno(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: uno:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: bl __unordtf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, ne
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: uno:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __unordtf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, ne
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp uno fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
+define double @ueq(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: ueq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #64
+; CHECK-SD-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT: bl __eqtf2
+; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT: mov w19, w0
+; CHECK-SD-NEXT: bl __unordtf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ccmp w19, #0, #4, eq
+; CHECK-SD-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, eq
+; CHECK-SD-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #64
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ueq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #64
+; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __eqtf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: mov w19, w0
+; CHECK-GI-NEXT: bl __unordtf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ccmp w19, #0, #4, eq
+; CHECK-GI-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, eq
+; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #64
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp ueq fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
+define double @ugt(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: ugt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: bl __letf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, gt
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ugt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __letf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, gt
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp ugt fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
+define double @uge(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: uge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, ge
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: uge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, ge
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp uge fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
+define double @ult(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: ult:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: bl __getf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, lt
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ult:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, lt
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp ult fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
+define double @ule(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: ule:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: bl __gttf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, le
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ule:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, le
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp ule fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
+define double @une(fp128 %a, fp128 %b, double %d, double %e) {
+; CHECK-SD-LABEL: une:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: bl __netf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, ne
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: une:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __netf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, ne
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fcmp une fp128 %a, %b
+ %s = select i1 %c, double %d, double %e
+ ret double %s
+}
+
diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll
index a76d0b36fa1aa..6fe96611167da 100644
--- a/llvm/test/CodeGen/AArch64/fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/fcmp.ll
@@ -4,37 +4,51 @@
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
-
-; CHECK-GI: warning: Instruction selection used fallback path for f128_fp128
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for f128_i128
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for f128_double
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for f128_float
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for f128_i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for f128_half
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2f128_fp128
+; CHECK-GI: warning: Instruction selection used fallback path for v2f128_fp128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v3f128_fp128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2f128_double
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v3f128_double
-
define fp128 @f128_fp128(fp128 %a, fp128 %b, fp128 %d, fp128 %e) {
-; CHECK-LABEL: f128_fp128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sub sp, sp, #48
-; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: stp q2, q3, [sp] // 32-byte Folded Spill
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: b.ge .LBB0_2
-; CHECK-NEXT: // %bb.1: // %entry
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: .LBB0_2: // %entry
-; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #48
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: f128_fp128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: stp q2, q3, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: b.ge .LBB0_2
+; CHECK-SD-NEXT: // %bb.1: // %entry
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: .LBB0_2: // %entry
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: f128_fp128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #48
+; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: stp q3, q2, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-GI-NEXT: mov d0, v2.d[1]
+; CHECK-GI-NEXT: mov d1, v3.d[1]
+; CHECK-GI-NEXT: fcsel d2, d2, d3, lt
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: fcsel d1, d0, d1, lt
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: add sp, sp, #48
+; CHECK-GI-NEXT: ret
entry:
%c = fcmp olt fp128 %a, %b
%s = select i1 %c, fp128 %d, fp128 %e
@@ -42,37 +56,61 @@ entry:
}
define i128 @f128_i128(fp128 %a, fp128 %b, i128 %d, i128 %e) {
-; CHECK-LABEL: f128_i128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sub sp, sp, #80
-; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w21, -24
-; CHECK-NEXT: .cfi_offset w22, -32
-; CHECK-NEXT: .cfi_offset w30, -48
-; CHECK-NEXT: mov x19, x3
-; CHECK-NEXT: mov x20, x2
-; CHECK-NEXT: mov x21, x1
-; CHECK-NEXT: mov x22, x0
-; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: csel x20, x22, x20, lt
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: mov w8, w0
-; CHECK-NEXT: mov x0, x20
-; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: csel x1, x21, x19, lt
-; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #80
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: f128_i128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #80
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w20, -16
+; CHECK-SD-NEXT: .cfi_offset w21, -24
+; CHECK-SD-NEXT: .cfi_offset w22, -32
+; CHECK-SD-NEXT: .cfi_offset w30, -48
+; CHECK-SD-NEXT: mov x19, x3
+; CHECK-SD-NEXT: mov x20, x2
+; CHECK-SD-NEXT: mov x21, x1
+; CHECK-SD-NEXT: mov x22, x0
+; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: csel x20, x22, x20, lt
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: mov w8, w0
+; CHECK-SD-NEXT: mov x0, x20
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: cmp w8, #0
+; CHECK-SD-NEXT: csel x1, x21, x19, lt
+; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #80
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: f128_i128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w21, -24
+; CHECK-GI-NEXT: .cfi_offset w22, -32
+; CHECK-GI-NEXT: .cfi_offset w30, -48
+; CHECK-GI-NEXT: mov x19, x0
+; CHECK-GI-NEXT: mov x20, x1
+; CHECK-GI-NEXT: mov x21, x2
+; CHECK-GI-NEXT: mov x22, x3
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: csel x0, x19, x21, lt
+; CHECK-GI-NEXT: csel x1, x20, x22, lt
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
entry:
%c = fcmp olt fp128 %a, %b
%s = select i1 %c, i128 %d, i128 %e
@@ -80,22 +118,39 @@ entry:
}
define double @f128_double(fp128 %a, fp128 %b, double %d, double %e) {
-; CHECK-LABEL: f128_double:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: .cfi_offset b8, -24
-; CHECK-NEXT: .cfi_offset b9, -32
-; CHECK-NEXT: fmov d8, d3
-; CHECK-NEXT: fmov d9, d2
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: fcsel d0, d9, d8, lt
-; CHECK-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: f128_double:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov d8, d3
+; CHECK-SD-NEXT: fmov d9, d2
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel d0, d9, d8, lt
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: f128_double:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov d8, d2
+; CHECK-GI-NEXT: fmov d9, d3
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d0, d8, d9, lt
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
entry:
%c = fcmp olt fp128 %a, %b
%s = select i1 %c, double %d, double %e
@@ -103,22 +158,39 @@ entry:
}
define float @f128_float(fp128 %a, fp128 %b, float %d, float %e) {
-; CHECK-LABEL: f128_float:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: .cfi_offset b8, -24
-; CHECK-NEXT: .cfi_offset b9, -32
-; CHECK-NEXT: fmov s8, s3
-; CHECK-NEXT: fmov s9, s2
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: fcsel s0, s9, s8, lt
-; CHECK-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: f128_float:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -24
+; CHECK-SD-NEXT: .cfi_offset b9, -32
+; CHECK-SD-NEXT: fmov s8, s3
+; CHECK-SD-NEXT: fmov s9, s2
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcsel s0, s9, s8, lt
+; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: f128_float:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov s8, s2
+; CHECK-GI-NEXT: fmov s9, s3
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcsel s0, s8, s9, lt
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
entry:
%c = fcmp olt fp128 %a, %b
%s = select i1 %c, float %d, float %e
@@ -126,22 +198,39 @@ entry:
}
define i32 @f128_i32(fp128 %a, fp128 %b, i32 %d, i32 %e) {
-; CHECK-LABEL: f128_i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w30, -32
-; CHECK-NEXT: mov w19, w1
-; CHECK-NEXT: mov w20, w0
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: csel w0, w20, w19, lt
-; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: f128_i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w20, -16
+; CHECK-SD-NEXT: .cfi_offset w30, -32
+; CHECK-SD-NEXT: mov w19, w1
+; CHECK-SD-NEXT: mov w20, w0
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: csel w0, w20, w19, lt
+; CHECK-SD-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: f128_i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: mov w19, w0
+; CHECK-GI-NEXT: mov w20, w1
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: csel w0, w19, w20, lt
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
entry:
%c = fcmp olt fp128 %a, %b
%s = select i1 %c, i32 %d, i32 %e
@@ -184,40 +273,26 @@ define half @f128_half(fp128 %a, fp128 %b, half %d, half %e) {
; CHECK-SD-FP16-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NOFP16-LABEL: f128_half:
-; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-GI-NOFP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-GI-NOFP16-NEXT: .cfi_def_cfa_offset 32
-; CHECK-GI-NOFP16-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NOFP16-NEXT: .cfi_offset b8, -24
-; CHECK-GI-NOFP16-NEXT: .cfi_offset b9, -32
-; CHECK-GI-NOFP16-NEXT: fmov s8, s3
-; CHECK-GI-NOFP16-NEXT: fmov s9, s2
-; CHECK-GI-NOFP16-NEXT: bl __lttf2
-; CHECK-GI-NOFP16-NEXT: cmp w0, #0
-; CHECK-GI-NOFP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-GI-NOFP16-NEXT: fcsel s0, s9, s8, lt
-; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
-; CHECK-GI-NOFP16-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
-; CHECK-GI-NOFP16-NEXT: ret
-;
-; CHECK-GI-FP16-LABEL: f128_half:
-; CHECK-GI-FP16: // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-GI-FP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-GI-FP16-NEXT: .cfi_def_cfa_offset 32
-; CHECK-GI-FP16-NEXT: .cfi_offset w30, -16
-; CHECK-GI-FP16-NEXT: .cfi_offset b8, -24
-; CHECK-GI-FP16-NEXT: .cfi_offset b9, -32
-; CHECK-GI-FP16-NEXT: fmov s8, s3
-; CHECK-GI-FP16-NEXT: fmov s9, s2
-; CHECK-GI-FP16-NEXT: bl __lttf2
-; CHECK-GI-FP16-NEXT: cmp w0, #0
-; CHECK-GI-FP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-GI-FP16-NEXT: fcsel h0, h9, h8, lt
-; CHECK-GI-FP16-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
-; CHECK-GI-FP16-NEXT: ret
+; CHECK-GI-LABEL: f128_half:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: fmov s8, s2
+; CHECK-GI-NEXT: fmov s9, s3
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: fmov w8, s8
+; CHECK-GI-NEXT: fmov w9, s9
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: csel w8, w8, w9, lt
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: // kill: def $h0 killed $h0 killed $s0
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
entry:
%c = fcmp olt fp128 %a, %b
%s = select i1 %c, half %d, half %e
More information about the llvm-commits
mailing list