[llvm] [GlobalISel][AArch64] Add G_FPTOSI_SAT/G_FPTOUI_SAT (PR #96297)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 6 09:11:18 PDT 2024
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/96297
>From 9a2711e5b99db14ba06d22f32b069b9d3c050058 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 21 Jun 2024 12:00:35 +0100
Subject: [PATCH 1/3] [GlobalISel][AArch6] Add G_FPTOSI_SAT/G_FPTOUI_SAT
This is an implementation of the saturating fp to int conversions for
GlobalISel. On AArch64 the converstion instrctions work this way, producing
saturating results. LegalizerHelper::lowerFPTOINT_SAT is ported from SDAG.
AArch64 has a lot of existing tests for fptosi_sat, covering a wide range of
types. I have tried to make most of them work all at once, but a few fall back
due to other missing features such as f128 handling for min/max.
---
llvm/docs/GlobalISel/GenericOpcode.rst | 5 +
.../CodeGen/GlobalISel/GenericMachineInstrs.h | 2 +
.../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 +
.../CodeGen/GlobalISel/MachineIRBuilder.h | 10 +
llvm/include/llvm/Support/TargetOpcodes.def | 6 +
llvm/include/llvm/Target/GenericOpcodes.td | 12 +
.../Target/GlobalISel/SelectionDAGCompat.td | 2 +
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 8 +
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 147 +++
.../GISel/AArch64InstructionSelector.cpp | 6 +
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 49 +
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 4 +
.../GlobalISel/legalizer-info-validation.mir | 7 +
.../test/CodeGen/AArch64/fptosi-sat-scalar.ll | 974 ++++++++++++------
.../test/CodeGen/AArch64/fptoui-sat-scalar.ll | 712 ++++++++-----
llvm/test/TableGen/GlobalISelEmitter.td | 2 +-
16 files changed, 1387 insertions(+), 560 deletions(-)
diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index d32aeff5a69bb1..4c9a807c598c02 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -504,6 +504,11 @@ G_FPTOSI, G_FPTOUI, G_SITOFP, G_UITOFP
Convert between integer and floating point.
+G_FPTOSI_SAT, G_FPTOUI_SAT
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Saturating convert between integer and floating point.
+
G_FABS
^^^^^^
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index ef1171d9f1f64d..b7c545e5136f43 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -823,6 +823,8 @@ class GCastOp : public GenericMachineInstr {
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FPTOSI_SAT:
+ case TargetOpcode::G_FPTOUI_SAT:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_INTTOPTR:
case TargetOpcode::G_PTRTOINT:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index afd68250f5ca6e..5360850deeffd8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -398,6 +398,7 @@ class LegalizerHelper {
LegalizeResult lowerSITOFP(MachineInstr &MI);
LegalizeResult lowerFPTOUI(MachineInstr &MI);
LegalizeResult lowerFPTOSI(MachineInstr &MI);
+ LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI);
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI);
LegalizeResult lowerFPTRUNC(MachineInstr &MI);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 56a77b8596a18b..e02a967988a575 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -2000,6 +2000,16 @@ class MachineIRBuilder {
return buildInstr(TargetOpcode::G_FPTOSI, {Dst}, {Src0});
}
+ /// Build and insert \p Res = G_FPTOUI_SAT \p Src0
+ MachineInstrBuilder buildFPTOUI_SAT(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_FPTOUI_SAT, {Dst}, {Src0});
+ }
+
+ /// Build and insert \p Res = G_FPTOSI_SAT \p Src0
+ MachineInstrBuilder buildFPTOSI_SAT(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_FPTOSI_SAT, {Dst}, {Src0});
+ }
+
/// Build and insert \p Dst = G_INTRINSIC_ROUNDEVEN \p Src0, \p Src1
MachineInstrBuilder
buildIntrinsicRoundeven(const DstOp &Dst, const SrcOp &Src0,
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index e1883de0c93b4c..cd3d7c2e11f922 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -682,6 +682,12 @@ HANDLE_TARGET_OPCODE(G_SITOFP)
/// Generic unsigned-int to float conversion
HANDLE_TARGET_OPCODE(G_UITOFP)
+/// Generic saturating float to signed-int conversion
+HANDLE_TARGET_OPCODE(G_FPTOSI_SAT)
+
+/// Generic saturating float to unsigned-int conversion
+HANDLE_TARGET_OPCODE(G_FPTOUI_SAT)
+
/// Generic FP absolute value.
HANDLE_TARGET_OPCODE(G_FABS)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 36a0a087ba457c..f5570db373e4a7 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -769,6 +769,18 @@ def G_UITOFP : GenericInstruction {
let hasSideEffects = false;
}
+def G_FPTOSI_SAT : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = false;
+}
+
+def G_FPTOUI_SAT : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = false;
+}
+
def G_FABS : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 72d155b483cf2b..627c57429941ea 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -98,6 +98,8 @@ def : GINodeEquiv<G_FPTOSI, fp_to_sint>;
def : GINodeEquiv<G_FPTOUI, fp_to_uint>;
def : GINodeEquiv<G_SITOFP, sint_to_fp>;
def : GINodeEquiv<G_UITOFP, uint_to_fp>;
+def : GINodeEquiv<G_FPTOSI_SAT, fp_to_sint_sat>;
+def : GINodeEquiv<G_FPTOUI_SAT, fp_to_uint_sat>;
def : GINodeEquiv<G_FADD, fadd>;
def : GINodeEquiv<G_FSUB, fsub>;
def : GINodeEquiv<G_FMA, fma>;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b290d7fb4ce4a1..99267fab832b80 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2340,6 +2340,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineInstr::copyFlagsFromInstruction(CI));
return true;
}
+ case Intrinsic::fptosi_sat:
+ MIRBuilder.buildFPTOSI_SAT(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::fptoui_sat:
+ MIRBuilder.buildFPTOUI_SAT(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
case Intrinsic::memcpy_inline:
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
case Intrinsic::memcpy:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3640b77ff4068c..44d28fff47a2b4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1880,6 +1880,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
}
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI_SAT:
+ case TargetOpcode::G_FPTOSI_SAT:
return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_FPEXT:
if (TypeIdx != 0)
@@ -2872,6 +2874,47 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
else
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_FPTOSI_SAT:
+ case TargetOpcode::G_FPTOUI_SAT:
+ Observer.changingInstr(MI);
+
+ if (TypeIdx == 0) {
+ Register OldDst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(OldDst);
+ Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
+ Register NewDst;
+ MI.getOperand(0).setReg(ExtReg);
+ uint64_t ShortBits = Ty.getScalarSizeInBits();
+ uint64_t WideBits = WideTy.getScalarSizeInBits();
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
+ // z = i16 fptosi_sat(a)
+ // ->
+ // x = i32 fptosi_sat(a)
+ // y = smin(x, 32767)
+ // z = smax(y, -32768)
+ auto MaxVal = MIRBuilder.buildConstant(
+ WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
+ auto MinVal = MIRBuilder.buildConstant(
+ WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
+ Register MidReg =
+ MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
+ NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
+ } else {
+ // z = i16 fptoui_sat(a)
+ // ->
+ // x = i32 fptoui_sat(a)
+ // y = smin(x, 65535)
+ auto MaxVal = MIRBuilder.buildConstant(
+ WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
+ NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
+ }
+ MIRBuilder.buildTrunc(OldDst, NewDst);
+ } else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_LOAD:
@@ -4170,6 +4213,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerFPTOUI(MI);
case G_FPTOSI:
return lowerFPTOSI(MI);
+ case G_FPTOUI_SAT:
+ case G_FPTOSI_SAT:
+ return lowerFPTOINT_SAT(MI);
case G_FPTRUNC:
return lowerFPTRUNC(MI);
case G_FPOWI:
@@ -4986,6 +5032,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_UITOFP:
case G_FPTOSI:
case G_FPTOUI:
+ case G_FPTOSI_SAT:
+ case G_FPTOUI_SAT:
case G_INTTOPTR:
case G_PTRTOINT:
case G_ADDRSPACE_CAST:
@@ -5777,6 +5825,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FPTOSI_SAT:
+ case TargetOpcode::G_FPTOUI_SAT:
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP: {
Observer.changingInstr(MI);
@@ -7285,6 +7335,103 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
return Legalized;
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
+ unsigned SatWidth = DstTy.getScalarSizeInBits();
+
+ // Determine minimum and maximum integer values and their corresponding
+ // floating-point values.
+ APInt MinInt, MaxInt;
+ if (IsSigned) {
+ MinInt = APInt::getSignedMinValue(SatWidth);
+ MaxInt = APInt::getSignedMaxValue(SatWidth);
+ } else {
+ MinInt = APInt::getMinValue(SatWidth);
+ MaxInt = APInt::getMaxValue(SatWidth);
+ }
+
+ const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
+ APFloat MinFloat(Semantics);
+ APFloat MaxFloat(Semantics);
+
+ APFloat::opStatus MinStatus =
+ MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
+ APFloat::opStatus MaxStatus =
+ MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
+ bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
+ !(MaxStatus & APFloat::opStatus::opInexact);
+
+ // If the integer bounds are exactly representable as floats and min/max are
+ // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
+ // of comparisons and selects.
+ bool MinMaxLegal = LI.isLegal({TargetOpcode::G_FMINNUM, SrcTy}) &&
+ LI.isLegal({TargetOpcode::G_FMAXNUM, SrcTy});
+ if (AreExactFloatBounds && MinMaxLegal) {
+ // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
+ auto Max = MIRBuilder.buildFMaxNum(
+ SrcTy, Src, MIRBuilder.buildFConstant(SrcTy, MinFloat));
+ // Clamp by MaxFloat from above. NaN cannot occur.
+ auto Min = MIRBuilder.buildFMinNum(
+ SrcTy, Max, MIRBuilder.buildFConstant(SrcTy, MaxFloat),
+ MachineInstr::FmNoNans);
+ // Convert clamped value to integer. In the unsigned case we're done,
+ // because we mapped NaN to MinFloat, which will cast to zero.
+ if (!IsSigned) {
+ MIRBuilder.buildFPTOUI(Dst, Min);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Otherwise, select 0 if Src is NaN.
+ auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
+ auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
+ DstTy.changeElementSize(1), Src, Src);
+ MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
+ FpToInt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Result of direct conversion. The assumption here is that the operation is
+ // non-trapping and it's fine to apply it to an out-of-range value if we
+ // select it away later.
+ auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
+ : MIRBuilder.buildFPTOUI(DstTy, Src);
+
+ // If Src ULT MinFloat, select MinInt. In particular, this also selects
+ // MinInt if Src is NaN.
+ auto ULT =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
+ MIRBuilder.buildFConstant(SrcTy, MinFloat));
+ auto Max = MIRBuilder.buildSelect(
+ DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
+ // If Src OGT MaxFloat, select MaxInt.
+ auto OGT =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
+ MIRBuilder.buildFConstant(SrcTy, MaxFloat));
+
+ // In the unsigned case we are done, because we mapped NaN to MinInt, which
+ // is already zero.
+ if (!IsSigned) {
+ MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
+ Max, MachineInstr::FmNoNans);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Otherwise, select 0 if Src is NaN.
+ auto Min = MIRBuilder.buildSelect(
+ DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
+ auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
+ DstTy.changeElementSize(1), Src, Src);
+ MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 18361cf3685642..accadfd352f30e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2152,6 +2152,12 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
}
return false;
}
+ case TargetOpcode::G_FPTOSI_SAT:
+ I.setDesc(TII.get(TargetOpcode::G_FPTOSI));
+ return true;
+ case TargetOpcode::G_FPTOUI_SAT:
+ I.setDesc(TII.get(TargetOpcode::G_FPTOUI));
+ return true;
default:
return false;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index db5cd1d32d73d0..b0c1215bd5514a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -728,6 +728,55 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.libcallFor(
{{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
+ getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
+ .legalFor({{s32, s32},
+ {s64, s32},
+ {s32, s64},
+ {s64, s64},
+ {v2s64, v2s64},
+ {v4s32, v4s32},
+ {v2s32, v2s32}})
+ .legalIf([=](const LegalityQuery &Query) {
+ return HasFP16 &&
+ (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
+ Query.Types[1] == v8s16) &&
+ (Query.Types[0] == s32 || Query.Types[0] == s64 ||
+ Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
+ })
+ // Handle types larger than i64 by scalarizing/lowering.
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+ .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
+ // The range of a fp16 value fits into an i17, so we can lower the width
+ // to i64.
+ .narrowScalarIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
+ },
+ changeTo(0, s64))
+ .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
+ .moreElementsToNextPow2(0)
+ .widenScalarToNextPow2(0, /*MinSize=*/32)
+ .minScalar(0, s32)
+ .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ unsigned ITySize = Query.Types[0].getScalarSizeInBits();
+ return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
+ ITySize > Query.Types[1].getScalarSizeInBits();
+ },
+ LegalizeMutations::changeElementSizeTo(1, 0))
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ unsigned FTySize = Query.Types[1].getScalarSizeInBits();
+ return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
+ Query.Types[0].getScalarSizeInBits() < FTySize;
+ },
+ LegalizeMutations::changeElementSizeTo(0, 1))
+ .widenScalarOrEltToNextPow2(0)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampMaxNumElements(0, s64, 2);
+
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
.legalFor({{s32, s32},
{s64, s32},
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 23e135063147a1..8d63c36eb015f3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -535,6 +535,8 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
switch (MI.getOpcode()) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FPTOSI_SAT:
+ case TargetOpcode::G_FPTOUI_SAT:
case TargetOpcode::G_FCMP:
case TargetOpcode::G_LROUND:
case TargetOpcode::G_LLROUND:
@@ -799,6 +801,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FPTOSI_SAT:
+ case TargetOpcode::G_FPTOUI_SAT:
case TargetOpcode::G_INTRINSIC_LRINT:
case TargetOpcode::G_INTRINSIC_LLRINT:
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index b3b85090d11251..ddae45d96f6a78 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -538,6 +538,13 @@
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_FPTOSI_SAT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_FPTOUI_SAT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FABS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index eeb1504d8dc77b..588cfca431efe8 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
;
; 32-bit float to signed integer
@@ -18,13 +20,23 @@ declare i100 @llvm.fptosi.sat.i100.f32(float)
declare i128 @llvm.fptosi.sat.i128.f32(float)
define i1 @test_signed_i1_f32(float %f) nounwind {
-; CHECK-LABEL: test_signed_i1_f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs w8, s0
-; CHECK-NEXT: ands w8, w8, w8, asr #31
-; CHECK-NEXT: csinv w8, w8, wzr, ge
-; CHECK-NEXT: and w0, w8, #0x1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_signed_i1_f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs w8, s0
+; CHECK-SD-NEXT: ands w8, w8, w8, asr #31
+; CHECK-SD-NEXT: csinv w8, w8, wzr, ge
+; CHECK-SD-NEXT: and w0, w8, #0x1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_signed_i1_f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtzs w8, s0
+; CHECK-GI-NEXT: cmp w8, #0
+; CHECK-GI-NEXT: csel w8, w8, wzr, lt
+; CHECK-GI-NEXT: cmp w8, #0
+; CHECK-GI-NEXT: csinv w8, w8, wzr, ge
+; CHECK-GI-NEXT: and w0, w8, #0x1
+; CHECK-GI-NEXT: ret
%x = call i1 @llvm.fptosi.sat.i1.f32(float %f)
ret i1 %x
}
@@ -99,16 +111,27 @@ define i32 @test_signed_i32_f32(float %f) nounwind {
}
define i50 @test_signed_i50_f32(float %f) nounwind {
-; CHECK-LABEL: test_signed_i50_f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: csel x8, x8, x9, lt
-; CHECK-NEXT: mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: csel x0, x8, x9, gt
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_signed_i50_f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs x8, s0
+; CHECK-SD-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-NEXT: cmp x8, x9
+; CHECK-SD-NEXT: csel x8, x8, x9, lt
+; CHECK-SD-NEXT: mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-NEXT: cmp x8, x9
+; CHECK-SD-NEXT: csel x0, x8, x9, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_signed_i50_f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtzs x8, s0
+; CHECK-GI-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-NEXT: mov x10, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-NEXT: cmp x8, x9
+; CHECK-GI-NEXT: csel x8, x8, x9, lt
+; CHECK-GI-NEXT: cmp x8, x10
+; CHECK-GI-NEXT: csel x0, x8, x10, gt
+; CHECK-GI-NEXT: ret
%x = call i50 @llvm.fptosi.sat.i50.f32(float %f)
ret i50 %x
}
@@ -123,57 +146,105 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
}
define i100 @test_signed_i100_f32(float %f) nounwind {
-; CHECK-LABEL: test_signed_i100_f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: fmov s8, s0
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: movi v0.2s, #241, lsl #24
-; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT: mov x10, #34359738367 // =0x7ffffffff
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: fcmp s8, s0
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov x8, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: csel x8, x8, x1, lt
-; CHECK-NEXT: fcmp s8, s0
-; CHECK-NEXT: csel x8, x10, x8, gt
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: csel x0, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_signed_i100_f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: fmov s8, s0
+; CHECK-SD-NEXT: bl __fixsfti
+; CHECK-SD-NEXT: movi v0.2s, #241, lsl #24
+; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT: mov x10, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcmp s8, s0
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: csel x8, x8, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s0
+; CHECK-SD-NEXT: csel x8, x10, x8, gt
+; CHECK-SD-NEXT: csinv x9, x9, xzr, le
+; CHECK-SD-NEXT: fcmp s8, s8
+; CHECK-SD-NEXT: csel x0, xzr, x9, vs
+; CHECK-SD-NEXT: csel x1, xzr, x8, vs
+; CHECK-SD-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_signed_i100_f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT: fmov s8, s0
+; CHECK-GI-NEXT: bl __fixsfti
+; CHECK-GI-NEXT: movi v0.2s, #241, lsl #24
+; CHECK-GI-NEXT: mov w8, #1895825407 // =0x70ffffff
+; CHECK-GI-NEXT: mov x10, #34359738367 // =0x7ffffffff
+; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcmp s8, s0
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov x8, #34359738368 // =0x800000000
+; CHECK-GI-NEXT: csel x9, xzr, x0, lt
+; CHECK-GI-NEXT: csel x8, x8, x1, lt
+; CHECK-GI-NEXT: fcmp s8, s0
+; CHECK-GI-NEXT: csinv x9, x9, xzr, le
+; CHECK-GI-NEXT: csel x8, x10, x8, gt
+; CHECK-GI-NEXT: fcmp s8, s8
+; CHECK-GI-NEXT: csel x0, xzr, x9, vs
+; CHECK-GI-NEXT: csel x1, xzr, x8, vs
+; CHECK-GI-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
%x = call i100 @llvm.fptosi.sat.i100.f32(float %f)
ret i100 %x
}
define i128 @test_signed_i128_f32(float %f) nounwind {
-; CHECK-LABEL: test_signed_i128_f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: fmov s8, s0
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: movi v0.2s, #255, lsl #24
-; CHECK-NEXT: mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: fcmp s8, s0
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: csel x8, x8, x1, lt
-; CHECK-NEXT: fcmp s8, s0
-; CHECK-NEXT: csel x8, x10, x8, gt
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: csel x0, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_signed_i128_f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: fmov s8, s0
+; CHECK-SD-NEXT: bl __fixsfti
+; CHECK-SD-NEXT: movi v0.2s, #255, lsl #24
+; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcmp s8, s0
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: csel x8, x8, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s0
+; CHECK-SD-NEXT: csel x8, x10, x8, gt
+; CHECK-SD-NEXT: csinv x9, x9, xzr, le
+; CHECK-SD-NEXT: fcmp s8, s8
+; CHECK-SD-NEXT: csel x0, xzr, x9, vs
+; CHECK-SD-NEXT: csel x1, xzr, x8, vs
+; CHECK-SD-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_signed_i128_f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT: fmov s8, s0
+; CHECK-GI-NEXT: bl __fixsfti
+; CHECK-GI-NEXT: movi v0.2s, #255, lsl #24
+; CHECK-GI-NEXT: mov w8, #2130706431 // =0x7effffff
+; CHECK-GI-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fcmp s8, s0
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT: csel x9, xzr, x0, lt
+; CHECK-GI-NEXT: csel x8, x8, x1, lt
+; CHECK-GI-NEXT: fcmp s8, s0
+; CHECK-GI-NEXT: csinv x9, x9, xzr, le
+; CHECK-GI-NEXT: csel x8, x10, x8, gt
+; CHECK-GI-NEXT: fcmp s8, s8
+; CHECK-GI-NEXT: csel x0, xzr, x9, vs
+; CHECK-GI-NEXT: csel x1, xzr, x8, vs
+; CHECK-GI-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
%x = call i128 @llvm.fptosi.sat.i128.f32(float %f)
ret i128 %x
}
@@ -194,13 +265,23 @@ declare i100 @llvm.fptosi.sat.i100.f64(double)
declare i128 @llvm.fptosi.sat.i128.f64(double)
define i1 @test_signed_i1_f64(double %f) nounwind {
-; CHECK-LABEL: test_signed_i1_f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs w8, d0
-; CHECK-NEXT: ands w8, w8, w8, asr #31
-; CHECK-NEXT: csinv w8, w8, wzr, ge
-; CHECK-NEXT: and w0, w8, #0x1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_signed_i1_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs w8, d0
+; CHECK-SD-NEXT: ands w8, w8, w8, asr #31
+; CHECK-SD-NEXT: csinv w8, w8, wzr, ge
+; CHECK-SD-NEXT: and w0, w8, #0x1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_signed_i1_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtzs w8, d0
+; CHECK-GI-NEXT: cmp w8, #0
+; CHECK-GI-NEXT: csel w8, w8, wzr, lt
+; CHECK-GI-NEXT: cmp w8, #0
+; CHECK-GI-NEXT: csinv w8, w8, wzr, ge
+; CHECK-GI-NEXT: and w0, w8, #0x1
+; CHECK-GI-NEXT: ret
%x = call i1 @llvm.fptosi.sat.i1.f64(double %f)
ret i1 %x
}
@@ -275,16 +356,27 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
}
define i50 @test_signed_i50_f64(double %f) nounwind {
-; CHECK-LABEL: test_signed_i50_f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: csel x8, x8, x9, lt
-; CHECK-NEXT: mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: csel x0, x8, x9, gt
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_signed_i50_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs x8, d0
+; CHECK-SD-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-NEXT: cmp x8, x9
+; CHECK-SD-NEXT: csel x8, x8, x9, lt
+; CHECK-SD-NEXT: mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-NEXT: cmp x8, x9
+; CHECK-SD-NEXT: csel x0, x8, x9, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_signed_i50_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtzs x8, d0
+; CHECK-GI-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-NEXT: mov x10, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-NEXT: cmp x8, x9
+; CHECK-GI-NEXT: csel x8, x8, x9, lt
+; CHECK-GI-NEXT: cmp x8, x10
+; CHECK-GI-NEXT: csel x0, x8, x10, gt
+; CHECK-GI-NEXT: ret
%x = call i50 @llvm.fptosi.sat.i50.f64(double %f)
ret i50 %x
}
@@ -299,59 +391,109 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
}
define i100 @test_signed_i100_f64(double %f) nounwind {
-; CHECK-LABEL: test_signed_i100_f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: fmov d8, d0
-; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: mov x8, #-4170333254945079296 // =0xc620000000000000
-; CHECK-NEXT: mov x10, #34359738367 // =0x7ffffffff
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov x8, #5053038781909696511 // =0x461fffffffffffff
-; CHECK-NEXT: fcmp d8, d0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov x8, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: csel x8, x8, x1, lt
-; CHECK-NEXT: fcmp d8, d0
-; CHECK-NEXT: csel x8, x10, x8, gt
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: fcmp d8, d8
-; CHECK-NEXT: csel x0, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_signed_i100_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: fmov d8, d0
+; CHECK-SD-NEXT: bl __fixdfti
+; CHECK-SD-NEXT: mov x8, #-4170333254945079296 // =0xc620000000000000
+; CHECK-SD-NEXT: mov x10, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: mov x8, #5053038781909696511 // =0x461fffffffffffff
+; CHECK-SD-NEXT: fcmp d8, d0
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: csel x8, x8, x1, lt
+; CHECK-SD-NEXT: fcmp d8, d0
+; CHECK-SD-NEXT: csel x8, x10, x8, gt
+; CHECK-SD-NEXT: csinv x9, x9, xzr, le
+; CHECK-SD-NEXT: fcmp d8, d8
+; CHECK-SD-NEXT: csel x0, xzr, x9, vs
+; CHECK-SD-NEXT: csel x1, xzr, x8, vs
+; CHECK-SD-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_signed_i100_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT: fmov d8, d0
+; CHECK-GI-NEXT: bl __fixdfti
+; CHECK-GI-NEXT: mov x8, #-4170333254945079296 // =0xc620000000000000
+; CHECK-GI-NEXT: mov x10, #34359738367 // =0x7ffffffff
+; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: mov x8, #5053038781909696511 // =0x461fffffffffffff
+; CHECK-GI-NEXT: fcmp d8, d0
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: mov x8, #34359738368 // =0x800000000
+; CHECK-GI-NEXT: csel x9, xzr, x0, lt
+; CHECK-GI-NEXT: csel x8, x8, x1, lt
+; CHECK-GI-NEXT: fcmp d8, d0
+; CHECK-GI-NEXT: csinv x9, x9, xzr, le
+; CHECK-GI-NEXT: csel x8, x10, x8, gt
+; CHECK-GI-NEXT: fcmp d8, d8
+; CHECK-GI-NEXT: csel x0, xzr, x9, vs
+; CHECK-GI-NEXT: csel x1, xzr, x8, vs
+; CHECK-GI-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
%x = call i100 @llvm.fptosi.sat.i100.f64(double %f)
ret i100 %x
}
define i128 @test_signed_i128_f64(double %f) nounwind {
-; CHECK-LABEL: test_signed_i128_f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: fmov d8, d0
-; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: mov x8, #-4044232465378705408 // =0xc7e0000000000000
-; CHECK-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov x8, #5179139571476070399 // =0x47dfffffffffffff
-; CHECK-NEXT: fcmp d8, d0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: csel x8, x8, x1, lt
-; CHECK-NEXT: fcmp d8, d0
-; CHECK-NEXT: csel x8, x10, x8, gt
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: fcmp d8, d8
-; CHECK-NEXT: csel x0, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_signed_i128_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: fmov d8, d0
+; CHECK-SD-NEXT: bl __fixdfti
+; CHECK-SD-NEXT: mov x8, #-4044232465378705408 // =0xc7e0000000000000
+; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: mov x8, #5179139571476070399 // =0x47dfffffffffffff
+; CHECK-SD-NEXT: fcmp d8, d0
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: csel x8, x8, x1, lt
+; CHECK-SD-NEXT: fcmp d8, d0
+; CHECK-SD-NEXT: csel x8, x10, x8, gt
+; CHECK-SD-NEXT: csinv x9, x9, xzr, le
+; CHECK-SD-NEXT: fcmp d8, d8
+; CHECK-SD-NEXT: csel x0, xzr, x9, vs
+; CHECK-SD-NEXT: csel x1, xzr, x8, vs
+; CHECK-SD-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_signed_i128_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT: fmov d8, d0
+; CHECK-GI-NEXT: bl __fixdfti
+; CHECK-GI-NEXT: mov x8, #-4044232465378705408 // =0xc7e0000000000000
+; CHECK-GI-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: mov x8, #5179139571476070399 // =0x47dfffffffffffff
+; CHECK-GI-NEXT: fcmp d8, d0
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT: csel x9, xzr, x0, lt
+; CHECK-GI-NEXT: csel x8, x8, x1, lt
+; CHECK-GI-NEXT: fcmp d8, d0
+; CHECK-GI-NEXT: csinv x9, x9, xzr, le
+; CHECK-GI-NEXT: csel x8, x10, x8, gt
+; CHECK-GI-NEXT: fcmp d8, d8
+; CHECK-GI-NEXT: csel x0, xzr, x9, vs
+; CHECK-GI-NEXT: csel x1, xzr, x8, vs
+; CHECK-GI-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
%x = call i128 @llvm.fptosi.sat.i128.f64(double %f)
ret i128 %x
}
@@ -372,245 +514,429 @@ declare i100 @llvm.fptosi.sat.i100.f16(half)
declare i128 @llvm.fptosi.sat.i128.f16(half)
define i1 @test_signed_i1_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i1_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: fcvtzs w8, s0
-; CHECK-CVT-NEXT: ands w8, w8, w8, asr #31
-; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT: and w0, w8, #0x1
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_i1_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzs w8, h0
-; CHECK-FP16-NEXT: ands w8, w8, w8, asr #31
-; CHECK-FP16-NEXT: csinv w8, w8, wzr, ge
-; CHECK-FP16-NEXT: and w0, w8, #0x1
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_signed_i1_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: fcvtzs w8, s0
+; CHECK-SD-CVT-NEXT: ands w8, w8, w8, asr #31
+; CHECK-SD-CVT-NEXT: csinv w8, w8, wzr, ge
+; CHECK-SD-CVT-NEXT: and w0, w8, #0x1
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i1_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzs w8, h0
+; CHECK-SD-FP16-NEXT: ands w8, w8, w8, asr #31
+; CHECK-SD-FP16-NEXT: csinv w8, w8, wzr, ge
+; CHECK-SD-FP16-NEXT: and w0, w8, #0x1
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i1_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: fcvtzs w8, s0
+; CHECK-GI-CVT-NEXT: cmp w8, #0
+; CHECK-GI-CVT-NEXT: csel w8, w8, wzr, lt
+; CHECK-GI-CVT-NEXT: cmp w8, #0
+; CHECK-GI-CVT-NEXT: csinv w8, w8, wzr, ge
+; CHECK-GI-CVT-NEXT: and w0, w8, #0x1
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i1_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzs w8, h0
+; CHECK-GI-FP16-NEXT: cmp w8, #0
+; CHECK-GI-FP16-NEXT: csel w8, w8, wzr, lt
+; CHECK-GI-FP16-NEXT: cmp w8, #0
+; CHECK-GI-FP16-NEXT: csinv w8, w8, wzr, ge
+; CHECK-GI-FP16-NEXT: and w0, w8, #0x1
+; CHECK-GI-FP16-NEXT: ret
%x = call i1 @llvm.fptosi.sat.i1.f16(half %f)
ret i1 %x
}
define i8 @test_signed_i8_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i8_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov w8, #127 // =0x7f
-; CHECK-CVT-NEXT: fcvtzs w9, s0
-; CHECK-CVT-NEXT: cmp w9, #127
-; CHECK-CVT-NEXT: csel w8, w9, w8, lt
-; CHECK-CVT-NEXT: mov w9, #-128 // =0xffffff80
-; CHECK-CVT-NEXT: cmn w8, #128
-; CHECK-CVT-NEXT: csel w0, w8, w9, gt
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_i8_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzs w9, h0
-; CHECK-FP16-NEXT: mov w8, #127 // =0x7f
-; CHECK-FP16-NEXT: cmp w9, #127
-; CHECK-FP16-NEXT: csel w8, w9, w8, lt
-; CHECK-FP16-NEXT: mov w9, #-128 // =0xffffff80
-; CHECK-FP16-NEXT: cmn w8, #128
-; CHECK-FP16-NEXT: csel w0, w8, w9, gt
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_signed_i8_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: mov w8, #127 // =0x7f
+; CHECK-SD-CVT-NEXT: fcvtzs w9, s0
+; CHECK-SD-CVT-NEXT: cmp w9, #127
+; CHECK-SD-CVT-NEXT: csel w8, w9, w8, lt
+; CHECK-SD-CVT-NEXT: mov w9, #-128 // =0xffffff80
+; CHECK-SD-CVT-NEXT: cmn w8, #128
+; CHECK-SD-CVT-NEXT: csel w0, w8, w9, gt
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i8_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzs w9, h0
+; CHECK-SD-FP16-NEXT: mov w8, #127 // =0x7f
+; CHECK-SD-FP16-NEXT: cmp w9, #127
+; CHECK-SD-FP16-NEXT: csel w8, w9, w8, lt
+; CHECK-SD-FP16-NEXT: mov w9, #-128 // =0xffffff80
+; CHECK-SD-FP16-NEXT: cmn w8, #128
+; CHECK-SD-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i8_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov w8, #127 // =0x7f
+; CHECK-GI-CVT-NEXT: fcvtzs w9, s0
+; CHECK-GI-CVT-NEXT: cmp w9, #127
+; CHECK-GI-CVT-NEXT: csel w8, w9, w8, lt
+; CHECK-GI-CVT-NEXT: mov w9, #-128 // =0xffffff80
+; CHECK-GI-CVT-NEXT: cmn w8, #128
+; CHECK-GI-CVT-NEXT: csel w0, w8, w9, gt
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i8_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzs w9, h0
+; CHECK-GI-FP16-NEXT: mov w8, #127 // =0x7f
+; CHECK-GI-FP16-NEXT: cmp w9, #127
+; CHECK-GI-FP16-NEXT: csel w8, w9, w8, lt
+; CHECK-GI-FP16-NEXT: mov w9, #-128 // =0xffffff80
+; CHECK-GI-FP16-NEXT: cmn w8, #128
+; CHECK-GI-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT: ret
%x = call i8 @llvm.fptosi.sat.i8.f16(half %f)
ret i8 %x
}
define i13 @test_signed_i13_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i13_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov w8, #4095 // =0xfff
-; CHECK-CVT-NEXT: fcvtzs w9, s0
-; CHECK-CVT-NEXT: cmp w9, #4095
-; CHECK-CVT-NEXT: csel w8, w9, w8, lt
-; CHECK-CVT-NEXT: mov w9, #-4096 // =0xfffff000
-; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w0, w8, w9, gt
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_i13_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzs w9, h0
-; CHECK-FP16-NEXT: mov w8, #4095 // =0xfff
-; CHECK-FP16-NEXT: cmp w9, #4095
-; CHECK-FP16-NEXT: csel w8, w9, w8, lt
-; CHECK-FP16-NEXT: mov w9, #-4096 // =0xfffff000
-; CHECK-FP16-NEXT: cmn w8, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: csel w0, w8, w9, gt
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_signed_i13_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: mov w8, #4095 // =0xfff
+; CHECK-SD-CVT-NEXT: fcvtzs w9, s0
+; CHECK-SD-CVT-NEXT: cmp w9, #4095
+; CHECK-SD-CVT-NEXT: csel w8, w9, w8, lt
+; CHECK-SD-CVT-NEXT: mov w9, #-4096 // =0xfffff000
+; CHECK-SD-CVT-NEXT: cmn w8, #1, lsl #12 // =4096
+; CHECK-SD-CVT-NEXT: csel w0, w8, w9, gt
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i13_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzs w9, h0
+; CHECK-SD-FP16-NEXT: mov w8, #4095 // =0xfff
+; CHECK-SD-FP16-NEXT: cmp w9, #4095
+; CHECK-SD-FP16-NEXT: csel w8, w9, w8, lt
+; CHECK-SD-FP16-NEXT: mov w9, #-4096 // =0xfffff000
+; CHECK-SD-FP16-NEXT: cmn w8, #1, lsl #12 // =4096
+; CHECK-SD-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i13_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov w8, #4095 // =0xfff
+; CHECK-GI-CVT-NEXT: fcvtzs w9, s0
+; CHECK-GI-CVT-NEXT: cmp w9, #4095
+; CHECK-GI-CVT-NEXT: csel w8, w9, w8, lt
+; CHECK-GI-CVT-NEXT: mov w9, #-4096 // =0xfffff000
+; CHECK-GI-CVT-NEXT: cmn w8, #1, lsl #12 // =4096
+; CHECK-GI-CVT-NEXT: csel w0, w8, w9, gt
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i13_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzs w9, h0
+; CHECK-GI-FP16-NEXT: mov w8, #4095 // =0xfff
+; CHECK-GI-FP16-NEXT: cmp w9, #4095
+; CHECK-GI-FP16-NEXT: csel w8, w9, w8, lt
+; CHECK-GI-FP16-NEXT: mov w9, #-4096 // =0xfffff000
+; CHECK-GI-FP16-NEXT: cmn w8, #1, lsl #12 // =4096
+; CHECK-GI-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT: ret
%x = call i13 @llvm.fptosi.sat.i13.f16(half %f)
ret i13 %x
}
define i16 @test_signed_i16_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i16_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov w9, #32767 // =0x7fff
-; CHECK-CVT-NEXT: fcvtzs w8, s0
-; CHECK-CVT-NEXT: cmp w8, w9
-; CHECK-CVT-NEXT: csel w8, w8, w9, lt
-; CHECK-CVT-NEXT: mov w9, #-32768 // =0xffff8000
-; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w0, w8, w9, gt
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_i16_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzs w8, h0
-; CHECK-FP16-NEXT: mov w9, #32767 // =0x7fff
-; CHECK-FP16-NEXT: cmp w8, w9
-; CHECK-FP16-NEXT: csel w8, w8, w9, lt
-; CHECK-FP16-NEXT: mov w9, #-32768 // =0xffff8000
-; CHECK-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
-; CHECK-FP16-NEXT: csel w0, w8, w9, gt
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_signed_i16_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: mov w9, #32767 // =0x7fff
+; CHECK-SD-CVT-NEXT: fcvtzs w8, s0
+; CHECK-SD-CVT-NEXT: cmp w8, w9
+; CHECK-SD-CVT-NEXT: csel w8, w8, w9, lt
+; CHECK-SD-CVT-NEXT: mov w9, #-32768 // =0xffff8000
+; CHECK-SD-CVT-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-SD-CVT-NEXT: csel w0, w8, w9, gt
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i16_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzs w8, h0
+; CHECK-SD-FP16-NEXT: mov w9, #32767 // =0x7fff
+; CHECK-SD-FP16-NEXT: cmp w8, w9
+; CHECK-SD-FP16-NEXT: csel w8, w8, w9, lt
+; CHECK-SD-FP16-NEXT: mov w9, #-32768 // =0xffff8000
+; CHECK-SD-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-SD-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i16_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov w9, #32767 // =0x7fff
+; CHECK-GI-CVT-NEXT: fcvtzs w8, s0
+; CHECK-GI-CVT-NEXT: cmp w8, w9
+; CHECK-GI-CVT-NEXT: csel w8, w8, w9, lt
+; CHECK-GI-CVT-NEXT: mov w9, #-32768 // =0xffff8000
+; CHECK-GI-CVT-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-GI-CVT-NEXT: csel w0, w8, w9, gt
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i16_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzs w8, h0
+; CHECK-GI-FP16-NEXT: mov w9, #32767 // =0x7fff
+; CHECK-GI-FP16-NEXT: cmp w8, w9
+; CHECK-GI-FP16-NEXT: csel w8, w8, w9, lt
+; CHECK-GI-FP16-NEXT: mov w9, #-32768 // =0xffff8000
+; CHECK-GI-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-GI-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT: ret
%x = call i16 @llvm.fptosi.sat.i16.f16(half %f)
ret i16 %x
}
define i19 @test_signed_i19_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i19_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov w9, #262143 // =0x3ffff
-; CHECK-CVT-NEXT: fcvtzs w8, s0
-; CHECK-CVT-NEXT: cmp w8, w9
-; CHECK-CVT-NEXT: csel w8, w8, w9, lt
-; CHECK-CVT-NEXT: mov w9, #-262144 // =0xfffc0000
-; CHECK-CVT-NEXT: cmn w8, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: csel w0, w8, w9, gt
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_i19_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzs w8, h0
-; CHECK-FP16-NEXT: mov w9, #262143 // =0x3ffff
-; CHECK-FP16-NEXT: cmp w8, w9
-; CHECK-FP16-NEXT: csel w8, w8, w9, lt
-; CHECK-FP16-NEXT: mov w9, #-262144 // =0xfffc0000
-; CHECK-FP16-NEXT: cmn w8, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: csel w0, w8, w9, gt
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_signed_i19_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: mov w9, #262143 // =0x3ffff
+; CHECK-SD-CVT-NEXT: fcvtzs w8, s0
+; CHECK-SD-CVT-NEXT: cmp w8, w9
+; CHECK-SD-CVT-NEXT: csel w8, w8, w9, lt
+; CHECK-SD-CVT-NEXT: mov w9, #-262144 // =0xfffc0000
+; CHECK-SD-CVT-NEXT: cmn w8, #64, lsl #12 // =262144
+; CHECK-SD-CVT-NEXT: csel w0, w8, w9, gt
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i19_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzs w8, h0
+; CHECK-SD-FP16-NEXT: mov w9, #262143 // =0x3ffff
+; CHECK-SD-FP16-NEXT: cmp w8, w9
+; CHECK-SD-FP16-NEXT: csel w8, w8, w9, lt
+; CHECK-SD-FP16-NEXT: mov w9, #-262144 // =0xfffc0000
+; CHECK-SD-FP16-NEXT: cmn w8, #64, lsl #12 // =262144
+; CHECK-SD-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i19_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov w9, #262143 // =0x3ffff
+; CHECK-GI-CVT-NEXT: fcvtzs w8, s0
+; CHECK-GI-CVT-NEXT: cmp w8, w9
+; CHECK-GI-CVT-NEXT: csel w8, w8, w9, lt
+; CHECK-GI-CVT-NEXT: mov w9, #-262144 // =0xfffc0000
+; CHECK-GI-CVT-NEXT: cmn w8, #64, lsl #12 // =262144
+; CHECK-GI-CVT-NEXT: csel w0, w8, w9, gt
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i19_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzs w8, h0
+; CHECK-GI-FP16-NEXT: mov w9, #262143 // =0x3ffff
+; CHECK-GI-FP16-NEXT: cmp w8, w9
+; CHECK-GI-FP16-NEXT: csel w8, w8, w9, lt
+; CHECK-GI-FP16-NEXT: mov w9, #-262144 // =0xfffc0000
+; CHECK-GI-FP16-NEXT: cmn w8, #64, lsl #12 // =262144
+; CHECK-GI-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT: ret
%x = call i19 @llvm.fptosi.sat.i19.f16(half %f)
ret i19 %x
}
define i32 @test_signed_i32_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i32_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: fcvtzs w0, s0
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_i32_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzs w0, h0
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_signed_i32_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: fcvtzs w0, s0
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i32_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzs w0, h0
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i32_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: fcvtzs w0, s0
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i32_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzs w0, h0
+; CHECK-GI-FP16-NEXT: ret
%x = call i32 @llvm.fptosi.sat.i32.f16(half %f)
ret i32 %x
}
define i50 @test_signed_i50_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i50_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff
-; CHECK-CVT-NEXT: fcvtzs x8, s0
-; CHECK-CVT-NEXT: cmp x8, x9
-; CHECK-CVT-NEXT: csel x8, x8, x9, lt
-; CHECK-CVT-NEXT: mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-CVT-NEXT: cmp x8, x9
-; CHECK-CVT-NEXT: csel x0, x8, x9, gt
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_i50_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzs x8, h0
-; CHECK-FP16-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff
-; CHECK-FP16-NEXT: cmp x8, x9
-; CHECK-FP16-NEXT: csel x8, x8, x9, lt
-; CHECK-FP16-NEXT: mov x9, #-562949953421312 // =0xfffe000000000000
-; CHECK-FP16-NEXT: cmp x8, x9
-; CHECK-FP16-NEXT: csel x0, x8, x9, gt
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_signed_i50_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-CVT-NEXT: fcvtzs x8, s0
+; CHECK-SD-CVT-NEXT: cmp x8, x9
+; CHECK-SD-CVT-NEXT: csel x8, x8, x9, lt
+; CHECK-SD-CVT-NEXT: mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-CVT-NEXT: cmp x8, x9
+; CHECK-SD-CVT-NEXT: csel x0, x8, x9, gt
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i50_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzs x8, h0
+; CHECK-SD-FP16-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-SD-FP16-NEXT: cmp x8, x9
+; CHECK-SD-FP16-NEXT: csel x8, x8, x9, lt
+; CHECK-SD-FP16-NEXT: mov x9, #-562949953421312 // =0xfffe000000000000
+; CHECK-SD-FP16-NEXT: cmp x8, x9
+; CHECK-SD-FP16-NEXT: csel x0, x8, x9, gt
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i50_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-CVT-NEXT: mov x10, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-CVT-NEXT: fcvtzs x8, s0
+; CHECK-GI-CVT-NEXT: cmp x8, x9
+; CHECK-GI-CVT-NEXT: csel x8, x8, x9, lt
+; CHECK-GI-CVT-NEXT: cmp x8, x10
+; CHECK-GI-CVT-NEXT: csel x0, x8, x10, gt
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i50_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzs x8, h0
+; CHECK-GI-FP16-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff
+; CHECK-GI-FP16-NEXT: mov x10, #-562949953421312 // =0xfffe000000000000
+; CHECK-GI-FP16-NEXT: cmp x8, x9
+; CHECK-GI-FP16-NEXT: csel x8, x8, x9, lt
+; CHECK-GI-FP16-NEXT: cmp x8, x10
+; CHECK-GI-FP16-NEXT: csel x0, x8, x10, gt
+; CHECK-GI-FP16-NEXT: ret
%x = call i50 @llvm.fptosi.sat.i50.f16(half %f)
ret i50 %x
}
define i64 @test_signed_i64_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_signed_i64_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: fcvtzs x0, s0
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_i64_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzs x0, h0
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_signed_i64_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: fcvtzs x0, s0
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_signed_i64_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzs x0, h0
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i64_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: fcvtzs x0, s0
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i64_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT: ret
%x = call i64 @llvm.fptosi.sat.i64.f16(half %f)
ret i64 %x
}
define i100 @test_signed_i100_f16(half %f) nounwind {
-; CHECK-LABEL: test_signed_i100_f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: movi v0.2s, #241, lsl #24
-; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT: mov x10, #34359738367 // =0x7ffffffff
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: fcmp s8, s0
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov x8, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: csel x8, x8, x1, lt
-; CHECK-NEXT: fcmp s8, s0
-; CHECK-NEXT: csel x8, x10, x8, gt
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: csel x0, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_signed_i100_f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixsfti
+; CHECK-SD-NEXT: movi v0.2s, #241, lsl #24
+; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT: mov x10, #34359738367 // =0x7ffffffff
+; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcmp s8, s0
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: csel x8, x8, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s0
+; CHECK-SD-NEXT: csel x8, x10, x8, gt
+; CHECK-SD-NEXT: csinv x9, x9, xzr, le
+; CHECK-SD-NEXT: fcmp s8, s8
+; CHECK-SD-NEXT: csel x0, xzr, x9, vs
+; CHECK-SD-NEXT: csel x1, xzr, x8, vs
+; CHECK-SD-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i100_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov x1, xzr
+; CHECK-GI-CVT-NEXT: fcvtzs x0, s0
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i100_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT: mov x1, xzr
+; CHECK-GI-FP16-NEXT: ret
%x = call i100 @llvm.fptosi.sat.i100.f16(half %f)
ret i100 %x
}
define i128 @test_signed_i128_f16(half %f) nounwind {
-; CHECK-LABEL: test_signed_i128_f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: movi v0.2s, #255, lsl #24
-; CHECK-NEXT: mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: fcmp s8, s0
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: csel x8, x8, x1, lt
-; CHECK-NEXT: fcmp s8, s0
-; CHECK-NEXT: csel x8, x10, x8, gt
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: csel x0, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_signed_i128_f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixsfti
+; CHECK-SD-NEXT: movi v0.2s, #255, lsl #24
+; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fcmp s8, s0
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: csel x8, x8, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s0
+; CHECK-SD-NEXT: csel x8, x10, x8, gt
+; CHECK-SD-NEXT: csinv x9, x9, xzr, le
+; CHECK-SD-NEXT: fcmp s8, s8
+; CHECK-SD-NEXT: csel x0, xzr, x9, vs
+; CHECK-SD-NEXT: csel x1, xzr, x8, vs
+; CHECK-SD-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_i128_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov x1, xzr
+; CHECK-GI-CVT-NEXT: fcvtzs x0, s0
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_i128_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT: mov x1, xzr
+; CHECK-GI-FP16-NEXT: ret
%x = call i128 @llvm.fptosi.sat.i128.f16(half %f)
ret i128 %x
}
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 1e1e7327f71fdc..ce6eba88838349 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
;
; 32-bit float to unsigned integer
@@ -18,12 +20,20 @@ declare i100 @llvm.fptoui.sat.i100.f32(float)
declare i128 @llvm.fptoui.sat.i128.f32(float)
define i1 @test_unsigned_i1_f32(float %f) nounwind {
-; CHECK-LABEL: test_unsigned_i1_f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu w8, s0
-; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: csinc w0, w8, wzr, lo
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_unsigned_i1_f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzu w8, s0
+; CHECK-SD-NEXT: cmp w8, #1
+; CHECK-SD-NEXT: csinc w0, w8, wzr, lo
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsigned_i1_f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtzu w8, s0
+; CHECK-GI-NEXT: cmp w8, #1
+; CHECK-GI-NEXT: csinc w8, w8, wzr, lo
+; CHECK-GI-NEXT: and w0, w8, #0x1
+; CHECK-GI-NEXT: ret
%x = call i1 @llvm.fptoui.sat.i1.f32(float %f)
ret i1 %x
}
@@ -107,46 +117,83 @@ define i64 @test_unsigned_i64_f32(float %f) nounwind {
}
define i100 @test_unsigned_i100_f32(float %f) nounwind {
-; CHECK-LABEL: test_unsigned_i100_f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: fmov s8, s0
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: mov x10, #68719476735 // =0xfffffffff
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp s8, s0
-; CHECK-NEXT: csel x1, x10, x9, gt
-; CHECK-NEXT: csinv x0, x8, xzr, le
-; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_unsigned_i100_f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: fmov s8, s0
+; CHECK-SD-NEXT: bl __fixunssfti
+; CHECK-SD-NEXT: mov w8, #1904214015 // =0x717fffff
+; CHECK-SD-NEXT: fcmp s8, #0.0
+; CHECK-SD-NEXT: mov x10, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: csel x8, xzr, x0, lt
+; CHECK-SD-NEXT: csel x9, xzr, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s0
+; CHECK-SD-NEXT: csel x1, x10, x9, gt
+; CHECK-SD-NEXT: csinv x0, x8, xzr, le
+; CHECK-SD-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsigned_i100_f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT: fmov s8, s0
+; CHECK-GI-NEXT: bl __fixunssfti
+; CHECK-GI-NEXT: mov w8, #1904214015 // =0x717fffff
+; CHECK-GI-NEXT: fcmp s8, #0.0
+; CHECK-GI-NEXT: mov x10, #68719476735 // =0xfffffffff
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT: csel x8, xzr, x0, lt
+; CHECK-GI-NEXT: csel x9, xzr, x1, lt
+; CHECK-GI-NEXT: fcmp s8, s0
+; CHECK-GI-NEXT: csinv x0, x8, xzr, le
+; CHECK-GI-NEXT: csel x1, x10, x9, gt
+; CHECK-GI-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
%x = call i100 @llvm.fptoui.sat.i100.f32(float %f)
ret i100 %x
}
define i128 @test_unsigned_i128_f32(float %f) nounwind {
-; CHECK-LABEL: test_unsigned_i128_f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: fmov s8, s0
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: csel x8, xzr, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s0
-; CHECK-NEXT: csinv x0, x9, xzr, le
-; CHECK-NEXT: csinv x1, x8, xzr, le
-; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_unsigned_i128_f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: fmov s8, s0
+; CHECK-SD-NEXT: bl __fixunssfti
+; CHECK-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff
+; CHECK-SD-NEXT: fcmp s8, #0.0
+; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: csel x8, xzr, x1, lt
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: fcmp s8, s0
+; CHECK-SD-NEXT: csinv x0, x9, xzr, le
+; CHECK-SD-NEXT: csinv x1, x8, xzr, le
+; CHECK-SD-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsigned_i128_f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT: fmov s8, s0
+; CHECK-GI-NEXT: bl __fixunssfti
+; CHECK-GI-NEXT: mov w8, #2139095039 // =0x7f7fffff
+; CHECK-GI-NEXT: fcmp s8, #0.0
+; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: csel x8, xzr, x0, lt
+; CHECK-GI-NEXT: csel x9, xzr, x1, lt
+; CHECK-GI-NEXT: fcmp s8, s0
+; CHECK-GI-NEXT: csinv x0, x8, xzr, le
+; CHECK-GI-NEXT: csinv x1, x9, xzr, le
+; CHECK-GI-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
%x = call i128 @llvm.fptoui.sat.i128.f32(float %f)
ret i128 %x
}
@@ -167,12 +214,20 @@ declare i100 @llvm.fptoui.sat.i100.f64(double)
declare i128 @llvm.fptoui.sat.i128.f64(double)
define i1 @test_unsigned_i1_f64(double %f) nounwind {
-; CHECK-LABEL: test_unsigned_i1_f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu w8, d0
-; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: csinc w0, w8, wzr, lo
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_unsigned_i1_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzu w8, d0
+; CHECK-SD-NEXT: cmp w8, #1
+; CHECK-SD-NEXT: csinc w0, w8, wzr, lo
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsigned_i1_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtzu w8, d0
+; CHECK-GI-NEXT: cmp w8, #1
+; CHECK-GI-NEXT: csinc w8, w8, wzr, lo
+; CHECK-GI-NEXT: and w0, w8, #0x1
+; CHECK-GI-NEXT: ret
%x = call i1 @llvm.fptoui.sat.i1.f64(double %f)
ret i1 %x
}
@@ -256,46 +311,83 @@ define i64 @test_unsigned_i64_f64(double %f) nounwind {
}
define i100 @test_unsigned_i100_f64(double %f) nounwind {
-; CHECK-LABEL: test_unsigned_i100_f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: fmov d8, d0
-; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: mov x8, #5057542381537067007 // =0x462fffffffffffff
-; CHECK-NEXT: fcmp d8, #0.0
-; CHECK-NEXT: mov x10, #68719476735 // =0xfffffffff
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp d8, d0
-; CHECK-NEXT: csel x1, x10, x9, gt
-; CHECK-NEXT: csinv x0, x8, xzr, le
-; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_unsigned_i100_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: fmov d8, d0
+; CHECK-SD-NEXT: bl __fixunsdfti
+; CHECK-SD-NEXT: mov x8, #5057542381537067007 // =0x462fffffffffffff
+; CHECK-SD-NEXT: fcmp d8, #0.0
+; CHECK-SD-NEXT: mov x10, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: csel x8, xzr, x0, lt
+; CHECK-SD-NEXT: csel x9, xzr, x1, lt
+; CHECK-SD-NEXT: fcmp d8, d0
+; CHECK-SD-NEXT: csel x1, x10, x9, gt
+; CHECK-SD-NEXT: csinv x0, x8, xzr, le
+; CHECK-SD-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsigned_i100_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT: fmov d8, d0
+; CHECK-GI-NEXT: bl __fixunsdfti
+; CHECK-GI-NEXT: mov x8, #5057542381537067007 // =0x462fffffffffffff
+; CHECK-GI-NEXT: fcmp d8, #0.0
+; CHECK-GI-NEXT: mov x10, #68719476735 // =0xfffffffff
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT: csel x8, xzr, x0, lt
+; CHECK-GI-NEXT: csel x9, xzr, x1, lt
+; CHECK-GI-NEXT: fcmp d8, d0
+; CHECK-GI-NEXT: csinv x0, x8, xzr, le
+; CHECK-GI-NEXT: csel x1, x10, x9, gt
+; CHECK-GI-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
%x = call i100 @llvm.fptoui.sat.i100.f64(double %f)
ret i100 %x
}
define i128 @test_unsigned_i128_f64(double %f) nounwind {
-; CHECK-LABEL: test_unsigned_i128_f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: fmov d8, d0
-; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff
-; CHECK-NEXT: fcmp d8, #0.0
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: csel x8, xzr, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp d8, d0
-; CHECK-NEXT: csinv x0, x9, xzr, le
-; CHECK-NEXT: csinv x1, x8, xzr, le
-; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_unsigned_i128_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: fmov d8, d0
+; CHECK-SD-NEXT: bl __fixunsdfti
+; CHECK-SD-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff
+; CHECK-SD-NEXT: fcmp d8, #0.0
+; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: csel x8, xzr, x1, lt
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: fcmp d8, d0
+; CHECK-SD-NEXT: csinv x0, x9, xzr, le
+; CHECK-SD-NEXT: csinv x1, x8, xzr, le
+; CHECK-SD-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsigned_i128_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT: fmov d8, d0
+; CHECK-GI-NEXT: bl __fixunsdfti
+; CHECK-GI-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff
+; CHECK-GI-NEXT: fcmp d8, #0.0
+; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: csel x8, xzr, x0, lt
+; CHECK-GI-NEXT: csel x9, xzr, x1, lt
+; CHECK-GI-NEXT: fcmp d8, d0
+; CHECK-GI-NEXT: csinv x0, x8, xzr, le
+; CHECK-GI-NEXT: csinv x1, x9, xzr, le
+; CHECK-GI-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
%x = call i128 @llvm.fptoui.sat.i128.f64(double %f)
ret i128 %x
}
@@ -316,202 +408,352 @@ declare i100 @llvm.fptoui.sat.i100.f16(half)
declare i128 @llvm.fptoui.sat.i128.f16(half)
define i1 @test_unsigned_i1_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i1_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: fcvtzu w8, s0
-; CHECK-CVT-NEXT: cmp w8, #1
-; CHECK-CVT-NEXT: csinc w0, w8, wzr, lo
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i1_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzu w8, h0
-; CHECK-FP16-NEXT: cmp w8, #1
-; CHECK-FP16-NEXT: csinc w0, w8, wzr, lo
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i1_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT: cmp w8, #1
+; CHECK-SD-CVT-NEXT: csinc w0, w8, wzr, lo
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i1_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT: cmp w8, #1
+; CHECK-SD-FP16-NEXT: csinc w0, w8, wzr, lo
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i1_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT: cmp w8, #1
+; CHECK-GI-CVT-NEXT: csinc w8, w8, wzr, lo
+; CHECK-GI-CVT-NEXT: and w0, w8, #0x1
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i1_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT: cmp w8, #1
+; CHECK-GI-FP16-NEXT: csinc w8, w8, wzr, lo
+; CHECK-GI-FP16-NEXT: and w0, w8, #0x1
+; CHECK-GI-FP16-NEXT: ret
%x = call i1 @llvm.fptoui.sat.i1.f16(half %f)
ret i1 %x
}
define i8 @test_unsigned_i8_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i8_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov w8, #255 // =0xff
-; CHECK-CVT-NEXT: fcvtzu w9, s0
-; CHECK-CVT-NEXT: cmp w9, #255
-; CHECK-CVT-NEXT: csel w0, w9, w8, lo
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i8_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzu w9, h0
-; CHECK-FP16-NEXT: mov w8, #255 // =0xff
-; CHECK-FP16-NEXT: cmp w9, #255
-; CHECK-FP16-NEXT: csel w0, w9, w8, lo
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i8_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: mov w8, #255 // =0xff
+; CHECK-SD-CVT-NEXT: fcvtzu w9, s0
+; CHECK-SD-CVT-NEXT: cmp w9, #255
+; CHECK-SD-CVT-NEXT: csel w0, w9, w8, lo
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i8_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzu w9, h0
+; CHECK-SD-FP16-NEXT: mov w8, #255 // =0xff
+; CHECK-SD-FP16-NEXT: cmp w9, #255
+; CHECK-SD-FP16-NEXT: csel w0, w9, w8, lo
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i8_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov w8, #255 // =0xff
+; CHECK-GI-CVT-NEXT: fcvtzu w9, s0
+; CHECK-GI-CVT-NEXT: cmp w9, #255
+; CHECK-GI-CVT-NEXT: csel w0, w9, w8, lo
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i8_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzu w9, h0
+; CHECK-GI-FP16-NEXT: mov w8, #255 // =0xff
+; CHECK-GI-FP16-NEXT: cmp w9, #255
+; CHECK-GI-FP16-NEXT: csel w0, w9, w8, lo
+; CHECK-GI-FP16-NEXT: ret
%x = call i8 @llvm.fptoui.sat.i8.f16(half %f)
ret i8 %x
}
define i13 @test_unsigned_i13_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i13_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov w9, #8191 // =0x1fff
-; CHECK-CVT-NEXT: fcvtzu w8, s0
-; CHECK-CVT-NEXT: cmp w8, w9
-; CHECK-CVT-NEXT: csel w0, w8, w9, lo
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i13_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzu w8, h0
-; CHECK-FP16-NEXT: mov w9, #8191 // =0x1fff
-; CHECK-FP16-NEXT: cmp w8, w9
-; CHECK-FP16-NEXT: csel w0, w8, w9, lo
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i13_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: mov w9, #8191 // =0x1fff
+; CHECK-SD-CVT-NEXT: fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT: cmp w8, w9
+; CHECK-SD-CVT-NEXT: csel w0, w8, w9, lo
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i13_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT: mov w9, #8191 // =0x1fff
+; CHECK-SD-FP16-NEXT: cmp w8, w9
+; CHECK-SD-FP16-NEXT: csel w0, w8, w9, lo
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i13_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov w9, #8191 // =0x1fff
+; CHECK-GI-CVT-NEXT: fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT: cmp w8, w9
+; CHECK-GI-CVT-NEXT: csel w0, w8, w9, lo
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i13_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT: mov w9, #8191 // =0x1fff
+; CHECK-GI-FP16-NEXT: cmp w8, w9
+; CHECK-GI-FP16-NEXT: csel w0, w8, w9, lo
+; CHECK-GI-FP16-NEXT: ret
%x = call i13 @llvm.fptoui.sat.i13.f16(half %f)
ret i13 %x
}
define i16 @test_unsigned_i16_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i16_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff
-; CHECK-CVT-NEXT: fcvtzu w8, s0
-; CHECK-CVT-NEXT: cmp w8, w9
-; CHECK-CVT-NEXT: csel w0, w8, w9, lo
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i16_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzu w8, h0
-; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff
-; CHECK-FP16-NEXT: cmp w8, w9
-; CHECK-FP16-NEXT: csel w0, w8, w9, lo
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i16_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: mov w9, #65535 // =0xffff
+; CHECK-SD-CVT-NEXT: fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT: cmp w8, w9
+; CHECK-SD-CVT-NEXT: csel w0, w8, w9, lo
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i16_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT: mov w9, #65535 // =0xffff
+; CHECK-SD-FP16-NEXT: cmp w8, w9
+; CHECK-SD-FP16-NEXT: csel w0, w8, w9, lo
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i16_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov w9, #65535 // =0xffff
+; CHECK-GI-CVT-NEXT: fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT: cmp w8, w9
+; CHECK-GI-CVT-NEXT: csel w0, w8, w9, lo
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i16_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT: mov w9, #65535 // =0xffff
+; CHECK-GI-FP16-NEXT: cmp w8, w9
+; CHECK-GI-FP16-NEXT: csel w0, w8, w9, lo
+; CHECK-GI-FP16-NEXT: ret
%x = call i16 @llvm.fptoui.sat.i16.f16(half %f)
ret i16 %x
}
define i19 @test_unsigned_i19_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i19_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov w9, #524287 // =0x7ffff
-; CHECK-CVT-NEXT: fcvtzu w8, s0
-; CHECK-CVT-NEXT: cmp w8, w9
-; CHECK-CVT-NEXT: csel w0, w8, w9, lo
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i19_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzu w8, h0
-; CHECK-FP16-NEXT: mov w9, #524287 // =0x7ffff
-; CHECK-FP16-NEXT: cmp w8, w9
-; CHECK-FP16-NEXT: csel w0, w8, w9, lo
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i19_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: mov w9, #524287 // =0x7ffff
+; CHECK-SD-CVT-NEXT: fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT: cmp w8, w9
+; CHECK-SD-CVT-NEXT: csel w0, w8, w9, lo
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i19_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzu w8, h0
+; CHECK-SD-FP16-NEXT: mov w9, #524287 // =0x7ffff
+; CHECK-SD-FP16-NEXT: cmp w8, w9
+; CHECK-SD-FP16-NEXT: csel w0, w8, w9, lo
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i19_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov w9, #524287 // =0x7ffff
+; CHECK-GI-CVT-NEXT: fcvtzu w8, s0
+; CHECK-GI-CVT-NEXT: cmp w8, w9
+; CHECK-GI-CVT-NEXT: csel w0, w8, w9, lo
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i19_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzu w8, h0
+; CHECK-GI-FP16-NEXT: mov w9, #524287 // =0x7ffff
+; CHECK-GI-FP16-NEXT: cmp w8, w9
+; CHECK-GI-FP16-NEXT: csel w0, w8, w9, lo
+; CHECK-GI-FP16-NEXT: ret
%x = call i19 @llvm.fptoui.sat.i19.f16(half %f)
ret i19 %x
}
define i32 @test_unsigned_i32_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i32_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: fcvtzu w0, s0
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i32_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzu w0, h0
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i32_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: fcvtzu w0, s0
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i32_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzu w0, h0
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i32_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: fcvtzu w0, s0
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i32_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzu w0, h0
+; CHECK-GI-FP16-NEXT: ret
%x = call i32 @llvm.fptoui.sat.i32.f16(half %f)
ret i32 %x
}
define i50 @test_unsigned_i50_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i50_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov x9, #1125899906842623 // =0x3ffffffffffff
-; CHECK-CVT-NEXT: fcvtzu x8, s0
-; CHECK-CVT-NEXT: cmp x8, x9
-; CHECK-CVT-NEXT: csel x0, x8, x9, lo
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i50_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzu x8, h0
-; CHECK-FP16-NEXT: mov x9, #1125899906842623 // =0x3ffffffffffff
-; CHECK-FP16-NEXT: cmp x8, x9
-; CHECK-FP16-NEXT: csel x0, x8, x9, lo
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i50_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: mov x9, #1125899906842623 // =0x3ffffffffffff
+; CHECK-SD-CVT-NEXT: fcvtzu x8, s0
+; CHECK-SD-CVT-NEXT: cmp x8, x9
+; CHECK-SD-CVT-NEXT: csel x0, x8, x9, lo
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i50_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzu x8, h0
+; CHECK-SD-FP16-NEXT: mov x9, #1125899906842623 // =0x3ffffffffffff
+; CHECK-SD-FP16-NEXT: cmp x8, x9
+; CHECK-SD-FP16-NEXT: csel x0, x8, x9, lo
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i50_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov x9, #1125899906842623 // =0x3ffffffffffff
+; CHECK-GI-CVT-NEXT: fcvtzu x8, s0
+; CHECK-GI-CVT-NEXT: cmp x8, x9
+; CHECK-GI-CVT-NEXT: csel x0, x8, x9, lo
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i50_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzu x8, h0
+; CHECK-GI-FP16-NEXT: mov x9, #1125899906842623 // =0x3ffffffffffff
+; CHECK-GI-FP16-NEXT: cmp x8, x9
+; CHECK-GI-FP16-NEXT: csel x0, x8, x9, lo
+; CHECK-GI-FP16-NEXT: ret
%x = call i50 @llvm.fptoui.sat.i50.f16(half %f)
ret i50 %x
}
define i64 @test_unsigned_i64_f16(half %f) nounwind {
-; CHECK-CVT-LABEL: test_unsigned_i64_f16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: fcvtzu x0, s0
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_i64_f16:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzu x0, h0
-; CHECK-FP16-NEXT: ret
+; CHECK-SD-CVT-LABEL: test_unsigned_i64_f16:
+; CHECK-SD-CVT: // %bb.0:
+; CHECK-SD-CVT-NEXT: fcvt s0, h0
+; CHECK-SD-CVT-NEXT: fcvtzu x0, s0
+; CHECK-SD-CVT-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: test_unsigned_i64_f16:
+; CHECK-SD-FP16: // %bb.0:
+; CHECK-SD-FP16-NEXT: fcvtzu x0, h0
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i64_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: fcvtzu x0, s0
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i64_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT: ret
%x = call i64 @llvm.fptoui.sat.i64.f16(half %f)
ret i64 %x
}
define i100 @test_unsigned_i100_f16(half %f) nounwind {
-; CHECK-LABEL: test_unsigned_i100_f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: mov x10, #68719476735 // =0xfffffffff
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp s8, s0
-; CHECK-NEXT: csel x1, x10, x9, gt
-; CHECK-NEXT: csinv x0, x8, xzr, le
-; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_unsigned_i100_f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixunssfti
+; CHECK-SD-NEXT: mov w8, #1904214015 // =0x717fffff
+; CHECK-SD-NEXT: fcmp s8, #0.0
+; CHECK-SD-NEXT: mov x10, #68719476735 // =0xfffffffff
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: csel x8, xzr, x0, lt
+; CHECK-SD-NEXT: csel x9, xzr, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s0
+; CHECK-SD-NEXT: csel x1, x10, x9, gt
+; CHECK-SD-NEXT: csinv x0, x8, xzr, le
+; CHECK-SD-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i100_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov x1, xzr
+; CHECK-GI-CVT-NEXT: fcvtzu x0, s0
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i100_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT: mov x1, xzr
+; CHECK-GI-FP16-NEXT: ret
%x = call i100 @llvm.fptoui.sat.i100.f16(half %f)
ret i100 %x
}
define i128 @test_unsigned_i128_f16(half %f) nounwind {
-; CHECK-LABEL: test_unsigned_i128_f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: csel x8, xzr, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s0
-; CHECK-NEXT: csinv x0, x9, xzr, le
-; CHECK-NEXT: csinv x1, x8, xzr, le
-; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_unsigned_i128_f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixunssfti
+; CHECK-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff
+; CHECK-SD-NEXT: fcmp s8, #0.0
+; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: csel x8, xzr, x1, lt
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: fcmp s8, s0
+; CHECK-SD-NEXT: csinv x0, x9, xzr, le
+; CHECK-SD-NEXT: csinv x1, x8, xzr, le
+; CHECK-SD-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_i128_f16:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: mov x1, xzr
+; CHECK-GI-CVT-NEXT: fcvtzu x0, s0
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_i128_f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT: mov x1, xzr
+; CHECK-GI-FP16-NEXT: ret
%x = call i128 @llvm.fptoui.sat.i128.f16(half %f)
ret i128 %x
}
diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td
index 853831366fa531..b9aea33ac96aaa 100644
--- a/llvm/test/TableGen/GlobalISelEmitter.td
+++ b/llvm/test/TableGen/GlobalISelEmitter.td
@@ -513,7 +513,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3),
// R00O-NEXT: GIM_Reject,
// R00O: // Label [[DEFAULT_NUM]]: @[[DEFAULT]]
// R00O-NEXT: GIM_Reject,
-// R00O-NEXT: }; // Size: 1816 bytes
+// R00O-NEXT: }; // Size: 1824 bytes
def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4),
[(set GPR32:$dst,
>From 627e6161ef872ef18da188be62c040f1cbfd74f5 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 5 Sep 2024 19:31:41 +0100
Subject: [PATCH 2/3] Add tablegen patterns
---
.../Target/GlobalISel/SelectionDAGCompat.td | 4 +--
.../include/llvm/Target/TargetSelectionDAG.td | 2 ++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 36 +++++++++++++++++--
.../GISel/AArch64InstructionSelector.cpp | 6 ----
4 files changed, 37 insertions(+), 11 deletions(-)
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 627c57429941ea..bbe8aa93bf32b6 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -98,8 +98,8 @@ def : GINodeEquiv<G_FPTOSI, fp_to_sint>;
def : GINodeEquiv<G_FPTOUI, fp_to_uint>;
def : GINodeEquiv<G_SITOFP, sint_to_fp>;
def : GINodeEquiv<G_UITOFP, uint_to_fp>;
-def : GINodeEquiv<G_FPTOSI_SAT, fp_to_sint_sat>;
-def : GINodeEquiv<G_FPTOUI_SAT, fp_to_uint_sat>;
+def : GINodeEquiv<G_FPTOSI_SAT, fp_to_sint_sat_gi>;
+def : GINodeEquiv<G_FPTOUI_SAT, fp_to_uint_sat_gi>;
def : GINodeEquiv<G_FADD, fadd>;
def : GINodeEquiv<G_FSUB, fsub>;
def : GINodeEquiv<G_FMA, fma>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index dd79002dcbdb48..abb1cc205358f7 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -569,6 +569,8 @@ def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>;
def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
def fp_to_sint_sat : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntSatOp>;
def fp_to_uint_sat : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntSatOp>;
+def fp_to_sint_sat_gi : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntOp>;
+def fp_to_uint_sat_gi : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntOp>;
def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
def bf16_to_fp : SDNode<"ISD::BF16_TO_FP" , SDTIntToFPOp>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ccef85bfaa8afc..32fa4af9ca7171 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4724,7 +4724,7 @@ defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
// AArch64's FCVT instructions saturate when out of range.
-multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
+multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
@@ -4740,6 +4740,21 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
@@ -4754,10 +4769,25 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
+ (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
+ (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
+ (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
+ (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+ def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
+ (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
+ (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
}
-defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
-defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
+defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
+defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
let Predicates = [HasFullFP16] in {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index accadfd352f30e..18361cf3685642 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2152,12 +2152,6 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
}
return false;
}
- case TargetOpcode::G_FPTOSI_SAT:
- I.setDesc(TII.get(TargetOpcode::G_FPTOSI));
- return true;
- case TargetOpcode::G_FPTOUI_SAT:
- I.setDesc(TII.get(TargetOpcode::G_FPTOUI));
- return true;
default:
return false;
}
>From 33d0336e1c923ca39ec6eebd8bcbf03e044cf288 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 6 Sep 2024 17:08:46 +0100
Subject: [PATCH 3/3] Remove Min/Max legality check and add test case for it.
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 25 +++---
.../test/CodeGen/AArch64/fptosi-sat-scalar.ll | 86 +++++++++++++++++++
.../test/CodeGen/AArch64/fptoui-sat-scalar.ll | 71 +++++++++++++++
3 files changed, 171 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 44d28fff47a2b4..9ceae696f5ddec 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7364,19 +7364,22 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
!(MaxStatus & APFloat::opStatus::opInexact);
- // If the integer bounds are exactly representable as floats and min/max are
- // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
- // of comparisons and selects.
- bool MinMaxLegal = LI.isLegal({TargetOpcode::G_FMINNUM, SrcTy}) &&
- LI.isLegal({TargetOpcode::G_FMAXNUM, SrcTy});
- if (AreExactFloatBounds && MinMaxLegal) {
+ // If the integer bounds are exactly representable as floats, emit a
+ // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
+ // and selects.
+ if (AreExactFloatBounds) {
// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
- auto Max = MIRBuilder.buildFMaxNum(
- SrcTy, Src, MIRBuilder.buildFConstant(SrcTy, MinFloat));
+ auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
+ auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
+ SrcTy.changeElementSize(1), Src, MaxC);
+ auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
// Clamp by MaxFloat from above. NaN cannot occur.
- auto Min = MIRBuilder.buildFMinNum(
- SrcTy, Max, MIRBuilder.buildFConstant(SrcTy, MaxFloat),
- MachineInstr::FmNoNans);
+ auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
+ auto MinP =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Max,
+ MinC, MachineInstr::FmNoNans);
+ auto Min =
+ MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
// Convert clamped value to integer. In the unsigned case we're done,
// because we mapped NaN to MinFloat, which will cast to zero.
if (!IsSigned) {
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index 588cfca431efe8..9c52b024d3e259 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -940,3 +940,89 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
%x = call i128 @llvm.fptosi.sat.i128.f16(half %f)
ret i128 %x
}
+
+define i32 @test_signed_f128_i32(fp128 %f) {
+; CHECK-SD-LABEL: test_signed_f128_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #32
+; CHECK-SD-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: adrp x8, .LCPI30_0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-SD-NEXT: bl __getf2
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov w19, w0
+; CHECK-SD-NEXT: bl __fixtfsi
+; CHECK-SD-NEXT: cmp w19, #0
+; CHECK-SD-NEXT: mov w8, #-2147483648 // =0x80000000
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: csel w19, w8, w0, lt
+; CHECK-SD-NEXT: adrp x8, .LCPI30_1
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-SD-NEXT: bl __gttf2
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov w8, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: csel w19, w8, w19, gt
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: bl __unordtf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: csel w0, wzr, w19, ne
+; CHECK-SD-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #32
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_signed_f128_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #64
+; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: adrp x8, .LCPI30_1
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-GI-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: mov d0, v2.d[1]
+; CHECK-GI-NEXT: mov d1, v3.d[1]
+; CHECK-GI-NEXT: fcsel d8, d2, d3, lt
+; CHECK-GI-NEXT: fmov x8, d8
+; CHECK-GI-NEXT: fcsel d9, d0, d1, lt
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: fmov x8, d9
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: adrp x8, .LCPI30_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: mov d0, v1.d[1]
+; CHECK-GI-NEXT: fcsel d1, d8, d1, gt
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: fcsel d2, d9, d0, gt
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: bl __fixtfsi
+; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov w19, w0
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: bl __unordtf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: csel w0, wzr, w19, ne
+; CHECK-GI-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #64
+; CHECK-GI-NEXT: ret
+ %x = call i32 @llvm.fptosi.sat.i32.f128(fp128 %f)
+ ret i32 %x
+}
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index ce6eba88838349..60f961fa8f9443 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -757,3 +757,74 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind {
%x = call i128 @llvm.fptoui.sat.i128.f16(half %f)
ret i128 %x
}
+
+define i32 @test_unsigned_f128_i32(fp128 %f) {
+; CHECK-SD-LABEL: test_unsigned_f128_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #32
+; CHECK-SD-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: adrp x8, .LCPI30_0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-SD-NEXT: bl __getf2
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov w19, w0
+; CHECK-SD-NEXT: bl __fixunstfsi
+; CHECK-SD-NEXT: adrp x8, .LCPI30_1
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: cmp w19, #0
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-SD-NEXT: csel w19, wzr, w0, lt
+; CHECK-SD-NEXT: bl __gttf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: csinv w0, w19, wzr, le
+; CHECK-SD-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #32
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsigned_f128_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #64
+; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: adrp x8, .LCPI30_1
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-GI-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: mov d0, v3.d[1]
+; CHECK-GI-NEXT: mov d1, v2.d[1]
+; CHECK-GI-NEXT: fcsel d8, d3, d2, lt
+; CHECK-GI-NEXT: fmov x8, d8
+; CHECK-GI-NEXT: fcsel d9, d0, d1, lt
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: fmov x8, d9
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: adrp x8, .LCPI30_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT: mov d0, v1.d[1]
+; CHECK-GI-NEXT: fcsel d1, d8, d1, gt
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: fcsel d2, d9, d0, gt
+; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: add sp, sp, #64
+; CHECK-GI-NEXT: b __fixunstfsi
+ %x = call i32 @llvm.fptoui.sat.i32.f128(fp128 %f)
+ ret i32 %x
+}
More information about the llvm-commits
mailing list