[llvm] [AArch64][GlobalISel] SIMD fpcvt codegen for fptoi(_sat) (PR #160831)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 01:38:10 PDT 2025
https://github.com/Lukacma created https://github.com/llvm/llvm-project/pull/160831
This is followup patch to #157680, which allows simd fpcvt instructions to be generated from fptoi(_sat) nodes.
>From 48a59b11fd76dd61a3a4b0d9f55370d37acab0bf Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Tue, 9 Sep 2025 14:18:54 +0000
Subject: [PATCH 1/6] [AArch64][GlobalISel] Add codegen for simd fpcvt
intrinsics
---
.../lib/Target/AArch64/AArch64InstrFormats.td | 26 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 87 ++-
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 32 +-
.../AArch64/arm64-cvt-simd-intrinsics.ll | 612 ++++++++++++++++++
4 files changed, 731 insertions(+), 26 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 8958ad129269c..74e8b98d7a47a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -5299,28 +5299,29 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
}
}
-multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm> {
+multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm,
+ SDPatternOperator OpN = null_frag> {
// double-precision to 32-bit SIMD/FPR
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
- []> {
+ [(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> {
let Inst{31} = 0; // 32-bit FPR flag
}
// half-precision to 32-bit SIMD/FPR
def SHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm,
- []> {
+ [(set FPR32:$Rd, (i32 (OpN (f16 FPR16:$Rn))))]> {
let Inst{31} = 0; // 32-bit FPR flag
}
// half-precision to 64-bit SIMD/FPR
def DHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm,
- []> {
+ [(set FPR64:$Rd, (i64 (OpN (f16 FPR16:$Rn))))]> {
let Inst{31} = 1; // 64-bit FPR flag
}
// single-precision to 64-bit SIMD/FPR
def DSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm,
- []> {
+ [(set FPR64:$Rd, (i64 (OpN (f32 FPR32:$Rn))))]> {
let Inst{31} = 1; // 64-bit FPR flag
}
}
@@ -7949,6 +7950,21 @@ multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
}
}
+let mayRaiseFPException = 1, Uses = [FPCR] in
+multiclass SIMDFPTwoScalarFCVT<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpN = null_frag> {
+ let Predicates = [HasNEONandIsStreamingSafe], FastISelShouldIgnore = 1 in {
+ def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
+ [(set (i64 FPR64:$Rd), (OpN (f64 FPR64:$Rn)))]>;
+ def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
+ [(set FPR32:$Rd, (i32 (OpN (f32 FPR32:$Rn))))]>;
+ }
+ let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
+ def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
+ [(set FPR16:$Rd, (i16 (OpN (f16 FPR16:$Rn))))]>;
+ }
+}
+
let mayRaiseFPException = 1, Uses = [FPCR] in
multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 62b26b5239365..b23f7a58ee4c0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5212,19 +5212,54 @@ defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
+defm FCVTAS : SIMDFPTwoScalarFCVT< 0, 0, 0b11100, "fcvtas", int_aarch64_neon_fcvtas>;
+defm FCVTAU : SIMDFPTwoScalarFCVT< 1, 0, 0b11100, "fcvtau", int_aarch64_neon_fcvtau>;
+defm FCVTMS : SIMDFPTwoScalarFCVT< 0, 0, 0b11011, "fcvtms", int_aarch64_neon_fcvtms>;
+defm FCVTMU : SIMDFPTwoScalarFCVT< 1, 0, 0b11011, "fcvtmu", int_aarch64_neon_fcvtmu>;
+defm FCVTNS : SIMDFPTwoScalarFCVT< 0, 0, 0b11010, "fcvtns", int_aarch64_neon_fcvtns>;
+defm FCVTNU : SIMDFPTwoScalarFCVT< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtnu>;
+defm FCVTPS : SIMDFPTwoScalarFCVT< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>;
+defm FCVTPU : SIMDFPTwoScalarFCVT< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>;
+defm FCVTZS : SIMDFPTwoScalarFCVT< 0, 1, 0b11011, "fcvtzs">;
+defm FCVTZU : SIMDFPTwoScalarFCVT< 1, 1, 0b11011, "fcvtzu">;
+
let Predicates = [HasNEON, HasFPRCVT] in{
- defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">;
- defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">;
- defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">;
- defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">;
- defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">;
- defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">;
- defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">;
- defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">;
+ defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas", int_aarch64_neon_fcvtas>;
+ defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau", int_aarch64_neon_fcvtau>;
+ defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms", int_aarch64_neon_fcvtms>;
+ defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu", int_aarch64_neon_fcvtmu>;
+ defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns", int_aarch64_neon_fcvtns>;
+ defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>;
+ defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>;
+ defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>;
defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
}
+multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
+ def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))),
+ (!cast<Instruction>(INST # SDr) FPR64:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))),
+ (!cast<Instruction>(INST # SHr) FPR16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (OpN (f16 FPR16:$Rn))))),
+ (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
+ (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
+ (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
+ (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
+
+}
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtms, "FCVTMS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtmu, "FCVTMU">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">;
+
// AArch64's FCVT instructions saturate when out of range.
multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
let Predicates = [HasFullFP16] in {
@@ -5301,6 +5336,32 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
+ // For global-isel we can use register classes to determine
+ // which FCVT instruction to use.
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # SHr) $Rn)>;
+ def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # DHr) $Rn)>;
+ def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # DSr) $Rn)>;
+ def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # SDr) $Rn)>;
+ }
+ def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # v1i32) $Rn)>;
+ def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # v1i64) $Rn)>;
+
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(f32 (bitconvert (i32 (round f16:$Rn)))),
+ (!cast<Instruction>(INST # SHr) $Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (round f16:$Rn)))),
+ (!cast<Instruction>(INST # DHr) $Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (round f32:$Rn)))),
+ (!cast<Instruction>(INST # DSr) $Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))),
+ (!cast<Instruction>(INST # SDr) $Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))),
+ (!cast<Instruction>(INST # v1i32) $Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))),
+ (!cast<Instruction>(INST # v1i64) $Rn)>;
+
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
@@ -6549,17 +6610,7 @@ defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
-defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
-defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
-defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
-defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
-defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
-defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
-defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
-defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index cf391c446a955..42ea80a679cb7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -568,9 +568,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
case Intrinsic::aarch64_neon_fcvtnu:
case Intrinsic::aarch64_neon_fcvtps:
case Intrinsic::aarch64_neon_fcvtpu:
- // Force FPR register bank for half types, as those types otherwise
- // don't get legalized correctly resulting in fp16 <-> gpr32 COPY's.
- return MRI.getType(MI.getOperand(2).getReg()) == LLT::float16();
+ return true;
default:
break;
}
@@ -1143,6 +1141,34 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
+ case Intrinsic::aarch64_neon_fcvtas:
+ case Intrinsic::aarch64_neon_fcvtau:
+ case Intrinsic::aarch64_neon_fcvtzs:
+ case Intrinsic::aarch64_neon_fcvtzu:
+ case Intrinsic::aarch64_neon_fcvtms:
+ case Intrinsic::aarch64_neon_fcvtmu:
+ case Intrinsic::aarch64_neon_fcvtns:
+ case Intrinsic::aarch64_neon_fcvtnu:
+ case Intrinsic::aarch64_neon_fcvtps:
+ case Intrinsic::aarch64_neon_fcvtpu: {
+ OpRegBankIdx[2] = PMI_FirstFPR;
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector()) {
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ break;
+ }
+ TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
+ TypeSize SrcSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, TRI);
+ if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
+ all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ return onlyUsesFP(UseMI, MRI, TRI) ||
+ prefersFPUse(UseMI, MRI, TRI);
+ }))
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ else
+ OpRegBankIdx[0] = PMI_FirstGPR;
+ break;
+ }
case Intrinsic::aarch64_neon_vcvtfxs2fp:
case Intrinsic::aarch64_neon_vcvtfxu2fp:
case Intrinsic::aarch64_neon_vcvtfp2fxs:
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
new file mode 100644
index 0000000000000..ae4f83a5bd261
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
@@ -0,0 +1,612 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+
+;
+; Intriniscs
+;
+
+define float @fcvtas_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtas_1s1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas s0, d0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.aarch64.neon.fcvtas.i32.f64(double %A)
+ %f = bitcast i32 %i to float
+ ret float %f
+}
+
+define double @fcvtas_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtas_1d1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
+ %d = bitcast i64 %i to double
+ ret double %d
+}
+
+define dso_local float @fcvtas_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtas_1s1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas s0, h0
+; CHECK-NEXT: ret
+ %fcvt = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
+ %f = bitcast i32 %fcvt to float
+ ret float %f
+}
+
+define dso_local double @fcvtas_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtas_1d1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
+ %d = bitcast i64 %vcvtah_s64_f16 to double
+ ret double %d
+}
+
+define dso_local double @fcvtas_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtas_1d1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a)
+ %d = bitcast i64 %vcvtah_s64_f64 to double
+ ret double %d
+}
+
+define dso_local float @fcvtas_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtas_1s1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas s0, s0
+; CHECK-NEXT: ret
+ %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %a)
+ %d = bitcast i32 %vcvtah_s32_f32 to float
+ ret float %d
+}
+
+
+define float @fcvtau_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtau_1s1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau s0, d0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.aarch64.neon.fcvtau.i32.f64(double %A)
+ %f = bitcast i32 %i to float
+ ret float %f
+}
+
+define double @fcvtau_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtau_1d1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A)
+ %d = bitcast i64 %i to double
+ ret double %d
+}
+
+define dso_local float @fcvtau_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtau_1s1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau s0, h0
+; CHECK-NEXT: ret
+ %fcvt = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
+ %f = bitcast i32 %fcvt to float
+ ret float %f
+}
+
+define dso_local double @fcvtau_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtau_1d1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
+ %d = bitcast i64 %vcvtah_s64_f16 to double
+ ret double %d
+}
+
+define dso_local double @fcvtau_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtau_1d1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a)
+ %d = bitcast i64 %vcvtah_s64_f64 to double
+ ret double %d
+}
+
+define dso_local float @fcvtau_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtau_1s1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau s0, s0
+; CHECK-NEXT: ret
+ %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %a)
+ %d = bitcast i32 %vcvtah_s32_f32 to float
+ ret float %d
+}
+
+define float @fcvtms_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtms_1s1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms s0, d0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.aarch64.neon.fcvtms.i32.f64(double %A)
+ %f = bitcast i32 %i to float
+ ret float %f
+}
+
+define double @fcvtms_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtms_1d1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A)
+ %d = bitcast i64 %i to double
+ ret double %d
+}
+
+define dso_local float @fcvtms_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtms_1s1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms s0, h0
+; CHECK-NEXT: ret
+ %fcvt = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
+ %f = bitcast i32 %fcvt to float
+ ret float %f
+}
+
+define dso_local double @fcvtms_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtms_1d1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
+ %d = bitcast i64 %vcvtah_s64_f16 to double
+ ret double %d
+}
+
+define dso_local double @fcvtms_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtms_1d1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a)
+ %d = bitcast i64 %vcvtah_s64_f64 to double
+ ret double %d
+}
+
+define dso_local float @fcvtms_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtms_1s1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms s0, s0
+; CHECK-NEXT: ret
+ %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %a)
+ %d = bitcast i32 %vcvtah_s32_f32 to float
+ ret float %d
+}
+
+define float @fcvtmu_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtmu_1s1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu s0, d0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double %A)
+ %f = bitcast i32 %i to float
+ ret float %f
+}
+
+define double @fcvtmu_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtmu_1d1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A)
+ %d = bitcast i64 %i to double
+ ret double %d
+}
+
+define dso_local float @fcvtmu_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtmu_1s1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu s0, h0
+; CHECK-NEXT: ret
+ %fcvt = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
+ %f = bitcast i32 %fcvt to float
+ ret float %f
+}
+
+define dso_local double @fcvtmu_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtmu_1d1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
+ %d = bitcast i64 %vcvtah_s64_f16 to double
+ ret double %d
+}
+
+define dso_local double @fcvtmu_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtmu_1d1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a)
+ %d = bitcast i64 %vcvtah_s64_f64 to double
+ ret double %d
+}
+
+define dso_local float @fcvtmu_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtmu_1s1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu s0, s0
+; CHECK-NEXT: ret
+ %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %a)
+ %d = bitcast i32 %vcvtah_s32_f32 to float
+ ret float %d
+}
+
+define float @fcvtns_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtns_1s1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, d0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.aarch64.neon.fcvtns.i32.f64(double %A)
+ %f = bitcast i32 %i to float
+ ret float %f
+}
+
+define double @fcvtns_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtns_1d1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A)
+ %d = bitcast i64 %i to double
+ ret double %d
+}
+
+define dso_local float @fcvtns_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtns_1s1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, h0
+; CHECK-NEXT: ret
+ %fcvt = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
+ %f = bitcast i32 %fcvt to float
+ ret float %f
+}
+
+define dso_local double @fcvtns_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtns_1d1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
+ %d = bitcast i64 %vcvtah_s64_f16 to double
+ ret double %d
+}
+
+define dso_local double @fcvtns_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtns_1d1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a)
+ %d = bitcast i64 %vcvtah_s64_f64 to double
+ ret double %d
+}
+
+define dso_local float @fcvtns_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtns_1s1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, s0
+; CHECK-NEXT: ret
+ %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %a)
+ %d = bitcast i32 %vcvtah_s32_f32 to float
+ ret float %d
+}
+
+define float @fcvtnu_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtnu_1s1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, d0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double %A)
+ %f = bitcast i32 %i to float
+ ret float %f
+}
+
+define double @fcvtnu_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtnu_1d1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A)
+ %d = bitcast i64 %i to double
+ ret double %d
+}
+
+define dso_local float @fcvtnu_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtnu_1s1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, h0
+; CHECK-NEXT: ret
+ %fcvt = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
+ %f = bitcast i32 %fcvt to float
+ ret float %f
+}
+
+define dso_local double @fcvtnu_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtnu_1d1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
+ %d = bitcast i64 %vcvtah_s64_f16 to double
+ ret double %d
+}
+
+define dso_local double @fcvtnu_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtnu_1d1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a)
+ %d = bitcast i64 %vcvtah_s64_f64 to double
+ ret double %d
+}
+
+define dso_local float @fcvtnu_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtnu_1s1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, s0
+; CHECK-NEXT: ret
+ %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %a)
+ %d = bitcast i32 %vcvtah_s32_f32 to float
+ ret float %d
+}
+
+define float @fcvtps_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtps_1s1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps s0, d0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.aarch64.neon.fcvtps.i32.f64(double %A)
+ %f = bitcast i32 %i to float
+ ret float %f
+}
+
+define double @fcvtps_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtps_1d1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A)
+ %d = bitcast i64 %i to double
+ ret double %d
+}
+
+define dso_local float @fcvtps_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtps_1s1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps s0, h0
+; CHECK-NEXT: ret
+ %fcvt = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
+ %f = bitcast i32 %fcvt to float
+ ret float %f
+}
+
+define dso_local double @fcvtps_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtps_1d1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
+ %d = bitcast i64 %vcvtah_s64_f16 to double
+ ret double %d
+}
+
+define dso_local double @fcvtps_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtps_1d1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a)
+ %d = bitcast i64 %vcvtah_s64_f64 to double
+ ret double %d
+}
+
+define dso_local float @fcvtps_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtps_1s1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtps s0, s0
+; CHECK-NEXT: ret
+ %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %a)
+ %d = bitcast i32 %vcvtah_s32_f32 to float
+ ret float %d
+}
+
+define float @fcvtpu_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtpu_1s1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu s0, d0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double %A)
+ %f = bitcast i32 %i to float
+ ret float %f
+}
+
+define double @fcvtpu_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtpu_1d1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A)
+ %d = bitcast i64 %i to double
+ ret double %d
+}
+
+define dso_local float @fcvtpu_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtpu_1s1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu s0, h0
+; CHECK-NEXT: ret
+ %fcvt = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
+ %f = bitcast i32 %fcvt to float
+ ret float %f
+}
+
+define dso_local double @fcvtpu_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtpu_1d1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
+ %d = bitcast i64 %vcvtah_s64_f16 to double
+ ret double %d
+}
+
+define dso_local double @fcvtpu_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtpu_1d1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a)
+ %d = bitcast i64 %vcvtah_s64_f64 to double
+ ret double %d
+}
+
+define dso_local float @fcvtpu_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtpu_1s1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtpu s0, s0
+; CHECK-NEXT: ret
+ %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %a)
+ %d = bitcast i32 %vcvtah_s32_f32 to float
+ ret float %d
+}
+
+define float @fcvtzs_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtzs_1s1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, d0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double %A)
+ %f = bitcast i32 %i to float
+ ret float %f
+}
+
+define double @fcvtzs_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzs_1d1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A)
+ %d = bitcast i64 %i to double
+ ret double %d
+}
+
+define dso_local float @fcvtzs_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtzs_1s1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, h0
+; CHECK-NEXT: ret
+ %fcvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
+ %f = bitcast i32 %fcvt to float
+ ret float %f
+}
+
+define dso_local double @fcvtzs_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtzs_1d1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
+ %d = bitcast i64 %vcvtah_s64_f16 to double
+ ret double %d
+}
+
+define dso_local double @fcvtzs_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtzs_1d1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a)
+ %d = bitcast i64 %vcvtah_s64_f64 to double
+ ret double %d
+}
+
+define dso_local float @fcvtzs_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtzs_1s1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %d = bitcast i32 %vcvtah_s32_f32 to float
+ ret float %d
+}
+
+define float @fcvtzu_1s1d_simd(double %A) nounwind {
+; CHECK-LABEL: fcvtzu_1s1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, d0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double %A)
+ %f = bitcast i32 %i to float
+ ret float %f
+}
+
+define double @fcvtzu_1d1s_simd(float %A) nounwind {
+; CHECK-LABEL: fcvtzu_1d1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A)
+ %d = bitcast i64 %i to double
+ ret double %d
+}
+
+define dso_local float @fcvtzu_1s1h_simd(half %a) {
+; CHECK-LABEL: fcvtzu_1s1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, h0
+; CHECK-NEXT: ret
+ %fcvt = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
+ %f = bitcast i32 %fcvt to float
+ ret float %f
+}
+
+define dso_local double @fcvtzu_1d1h_simd(half %a) {
+; CHECK-LABEL: fcvtzu_1d1h_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, h0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
+ %d = bitcast i64 %vcvtah_s64_f16 to double
+ ret double %d
+}
+
+define dso_local double @fcvtzu_1d1d_simd(double %a) {
+; CHECK-LABEL: fcvtzu_1d1d_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
+ %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a)
+ %d = bitcast i64 %vcvtah_s64_f64 to double
+ ret double %d
+}
+
+define dso_local float @fcvtzu_1s1s_simd(float %a) {
+; CHECK-LABEL: fcvtzu_1s1s_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: ret
+ %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %a)
+ %d = bitcast i32 %vcvtah_s32_f32 to float
+ ret float %d
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
>From d0b733116ccff30afd3f651bb6e5fa5514281ff8 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 24 Sep 2025 08:38:19 +0000
Subject: [PATCH 2/6] Moved old converts back and fixed broken tests
---
.../lib/Target/AArch64/AArch64InstrFormats.td | 17 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 179 +++++++++---------
.../AArch64/arm64-cvt-simd-intrinsics.ll | 87 ++++-----
llvm/test/CodeGen/AArch64/arm64-vcvt.ll | 28 +--
4 files changed, 143 insertions(+), 168 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4ae3e8590718c..f07d3514d1a99 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -7941,21 +7941,10 @@ multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
}
}
-let mayRaiseFPException = 1, Uses = [FPCR] in
-multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
+let mayRaiseFPException = 1, Uses = [FPCR], FastISelShouldIgnore = 1 in
+multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpN = null_frag> {
let Predicates = [HasNEONandIsStreamingSafe] in {
- def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
- def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
- }
- let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
- def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
- }
-}
-
-let mayRaiseFPException = 1, Uses = [FPCR] in
-multiclass SIMDFPTwoScalarFCVT<bit U, bit S, bits<5> opc, string asm,
- SDPatternOperator OpN = null_frag> {
- let Predicates = [HasNEONandIsStreamingSafe], FastISelShouldIgnore = 1 in {
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
[(set (i64 FPR64:$Rd), (OpN (f64 FPR64:$Rn)))]>;
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 169576058e7bd..04b3c90c2e177 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5230,17 +5230,6 @@ defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
-defm FCVTAS : SIMDFPTwoScalarFCVT< 0, 0, 0b11100, "fcvtas", int_aarch64_neon_fcvtas>;
-defm FCVTAU : SIMDFPTwoScalarFCVT< 1, 0, 0b11100, "fcvtau", int_aarch64_neon_fcvtau>;
-defm FCVTMS : SIMDFPTwoScalarFCVT< 0, 0, 0b11011, "fcvtms", int_aarch64_neon_fcvtms>;
-defm FCVTMU : SIMDFPTwoScalarFCVT< 1, 0, 0b11011, "fcvtmu", int_aarch64_neon_fcvtmu>;
-defm FCVTNS : SIMDFPTwoScalarFCVT< 0, 0, 0b11010, "fcvtns", int_aarch64_neon_fcvtns>;
-defm FCVTNU : SIMDFPTwoScalarFCVT< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtnu>;
-defm FCVTPS : SIMDFPTwoScalarFCVT< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>;
-defm FCVTPU : SIMDFPTwoScalarFCVT< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>;
-defm FCVTZS : SIMDFPTwoScalarFCVT< 0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDFPTwoScalarFCVT< 1, 1, 0b11011, "fcvtzu">;
-
let Predicates = [HasNEON, HasFPRCVT] in{
defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas", int_aarch64_neon_fcvtas>;
defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau", int_aarch64_neon_fcvtau>;
@@ -5254,29 +5243,6 @@ let Predicates = [HasNEON, HasFPRCVT] in{
defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
}
-multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
- def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))),
- (!cast<Instruction>(INST # SDr) FPR64:$Rn)>;
- def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))),
- (!cast<Instruction>(INST # SHr) FPR16:$Rn)>;
- def : Pat<(f64 (bitconvert (i64 (OpN (f16 FPR16:$Rn))))),
- (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
- def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
- (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
- def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
- (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
- def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
- (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
-
-}
-defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
-defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
-defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtms, "FCVTMS">;
-defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtmu, "FCVTMU">;
-defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">;
-defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">;
-defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">;
-defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">;
// AArch64's FCVT instructions saturate when out of range.
multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
@@ -5344,61 +5310,6 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string IN
defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
-multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
- def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
- }
- def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
- def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
- def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
- def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
-
- // For global-isel we can use register classes to determine
- // which FCVT instruction to use.
- let Predicates = [HasFPRCVT] in {
- def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # SHr) $Rn)>;
- def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # DHr) $Rn)>;
- def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # DSr) $Rn)>;
- def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # SDr) $Rn)>;
- }
- def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # v1i32) $Rn)>;
- def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # v1i64) $Rn)>;
-
- let Predicates = [HasFPRCVT] in {
- def : Pat<(f32 (bitconvert (i32 (round f16:$Rn)))),
- (!cast<Instruction>(INST # SHr) $Rn)>;
- def : Pat<(f64 (bitconvert (i64 (round f16:$Rn)))),
- (!cast<Instruction>(INST # DHr) $Rn)>;
- def : Pat<(f64 (bitconvert (i64 (round f32:$Rn)))),
- (!cast<Instruction>(INST # DSr) $Rn)>;
- def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))),
- (!cast<Instruction>(INST # SDr) $Rn)>;
- }
- def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))),
- (!cast<Instruction>(INST # v1i32) $Rn)>;
- def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))),
- (!cast<Instruction>(INST # v1i64) $Rn)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
- (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
- def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
- (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
- }
- def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
- (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
- def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
- (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
- def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
- (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
- def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
- (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
-}
-
-defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
-defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
-
multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
def : Pat<(i32 (to_int (round f32:$Rn))),
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
@@ -6633,7 +6544,17 @@ defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
+defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas", int_aarch64_neon_fcvtas>;
+defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau", int_aarch64_neon_fcvtau>;
+defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms", int_aarch64_neon_fcvtms>;
+defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu", int_aarch64_neon_fcvtmu>;
+defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns", int_aarch64_neon_fcvtns>;
+defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtnu>;
+defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>;
+defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
+defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
+defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
@@ -6651,6 +6572,86 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
int_aarch64_neon_usqadd>;
+// Floating-point conversion patterns.
+multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
+ def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))),
+ (!cast<Instruction>(INST # SDr) FPR64:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))),
+ (!cast<Instruction>(INST # SHr) FPR16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (OpN (f16 FPR16:$Rn))))),
+ (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
+ (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
+ (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
+ (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
+
+}
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtms, "FCVTMS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtmu, "FCVTMU">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">;
+
+multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
+ def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
+ }
+ def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
+ def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
+ def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
+ def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
+
+ // For global-isel we can use register classes to determine
+ // which FCVT instruction to use.
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # SHr) $Rn)>;
+ def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # DHr) $Rn)>;
+ def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # DSr) $Rn)>;
+ def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # SDr) $Rn)>;
+ }
+ def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # v1i32) $Rn)>;
+ def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # v1i64) $Rn)>;
+
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(f32 (bitconvert (i32 (round f16:$Rn)))),
+ (!cast<Instruction>(INST # SHr) $Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (round f16:$Rn)))),
+ (!cast<Instruction>(INST # DHr) $Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (round f32:$Rn)))),
+ (!cast<Instruction>(INST # DSr) $Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))),
+ (!cast<Instruction>(INST # SDr) $Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))),
+ (!cast<Instruction>(INST # v1i32) $Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))),
+ (!cast<Instruction>(INST # v1i64) $Rn)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
+ (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+ def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
+ (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
+ def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
+ (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+ def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
+ (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+ def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
+ (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+ def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
+ (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+}
+
+defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
+defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
+
// f16 -> s16 conversions
let Predicates = [HasFullFP16] in {
def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
index ae4f83a5bd261..d6b98458e0bed 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
;
@@ -27,7 +27,7 @@ define double @fcvtas_1d1s_simd(float %A) nounwind {
ret double %d
}
-define dso_local float @fcvtas_1s1h_simd(half %a) {
+define float @fcvtas_1s1h_simd(half %a) {
; CHECK-LABEL: fcvtas_1s1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtas s0, h0
@@ -37,7 +37,7 @@ define dso_local float @fcvtas_1s1h_simd(half %a) {
ret float %f
}
-define dso_local double @fcvtas_1d1h_simd(half %a) {
+define double @fcvtas_1d1h_simd(half %a) {
; CHECK-LABEL: fcvtas_1d1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtas d0, h0
@@ -47,7 +47,7 @@ define dso_local double @fcvtas_1d1h_simd(half %a) {
ret double %d
}
-define dso_local double @fcvtas_1d1d_simd(double %a) {
+define double @fcvtas_1d1d_simd(double %a) {
; CHECK-LABEL: fcvtas_1d1d_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtas d0, d0
@@ -57,7 +57,7 @@ define dso_local double @fcvtas_1d1d_simd(double %a) {
ret double %d
}
-define dso_local float @fcvtas_1s1s_simd(float %a) {
+define float @fcvtas_1s1s_simd(float %a) {
; CHECK-LABEL: fcvtas_1s1s_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtas s0, s0
@@ -88,7 +88,7 @@ define double @fcvtau_1d1s_simd(float %A) nounwind {
ret double %d
}
-define dso_local float @fcvtau_1s1h_simd(half %a) {
+define float @fcvtau_1s1h_simd(half %a) {
; CHECK-LABEL: fcvtau_1s1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtau s0, h0
@@ -98,7 +98,7 @@ define dso_local float @fcvtau_1s1h_simd(half %a) {
ret float %f
}
-define dso_local double @fcvtau_1d1h_simd(half %a) {
+define double @fcvtau_1d1h_simd(half %a) {
; CHECK-LABEL: fcvtau_1d1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtau d0, h0
@@ -108,7 +108,7 @@ define dso_local double @fcvtau_1d1h_simd(half %a) {
ret double %d
}
-define dso_local double @fcvtau_1d1d_simd(double %a) {
+define double @fcvtau_1d1d_simd(double %a) {
; CHECK-LABEL: fcvtau_1d1d_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtau d0, d0
@@ -118,7 +118,7 @@ define dso_local double @fcvtau_1d1d_simd(double %a) {
ret double %d
}
-define dso_local float @fcvtau_1s1s_simd(float %a) {
+define float @fcvtau_1s1s_simd(float %a) {
; CHECK-LABEL: fcvtau_1s1s_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtau s0, s0
@@ -148,7 +148,7 @@ define double @fcvtms_1d1s_simd(float %A) nounwind {
ret double %d
}
-define dso_local float @fcvtms_1s1h_simd(half %a) {
+define float @fcvtms_1s1h_simd(half %a) {
; CHECK-LABEL: fcvtms_1s1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtms s0, h0
@@ -158,7 +158,7 @@ define dso_local float @fcvtms_1s1h_simd(half %a) {
ret float %f
}
-define dso_local double @fcvtms_1d1h_simd(half %a) {
+define double @fcvtms_1d1h_simd(half %a) {
; CHECK-LABEL: fcvtms_1d1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtms d0, h0
@@ -168,7 +168,7 @@ define dso_local double @fcvtms_1d1h_simd(half %a) {
ret double %d
}
-define dso_local double @fcvtms_1d1d_simd(double %a) {
+define double @fcvtms_1d1d_simd(double %a) {
; CHECK-LABEL: fcvtms_1d1d_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtms d0, d0
@@ -178,7 +178,7 @@ define dso_local double @fcvtms_1d1d_simd(double %a) {
ret double %d
}
-define dso_local float @fcvtms_1s1s_simd(float %a) {
+define float @fcvtms_1s1s_simd(float %a) {
; CHECK-LABEL: fcvtms_1s1s_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtms s0, s0
@@ -208,7 +208,7 @@ define double @fcvtmu_1d1s_simd(float %A) nounwind {
ret double %d
}
-define dso_local float @fcvtmu_1s1h_simd(half %a) {
+define float @fcvtmu_1s1h_simd(half %a) {
; CHECK-LABEL: fcvtmu_1s1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtmu s0, h0
@@ -218,7 +218,7 @@ define dso_local float @fcvtmu_1s1h_simd(half %a) {
ret float %f
}
-define dso_local double @fcvtmu_1d1h_simd(half %a) {
+define double @fcvtmu_1d1h_simd(half %a) {
; CHECK-LABEL: fcvtmu_1d1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtmu d0, h0
@@ -228,7 +228,7 @@ define dso_local double @fcvtmu_1d1h_simd(half %a) {
ret double %d
}
-define dso_local double @fcvtmu_1d1d_simd(double %a) {
+define double @fcvtmu_1d1d_simd(double %a) {
; CHECK-LABEL: fcvtmu_1d1d_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtmu d0, d0
@@ -238,7 +238,7 @@ define dso_local double @fcvtmu_1d1d_simd(double %a) {
ret double %d
}
-define dso_local float @fcvtmu_1s1s_simd(float %a) {
+define float @fcvtmu_1s1s_simd(float %a) {
; CHECK-LABEL: fcvtmu_1s1s_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtmu s0, s0
@@ -268,7 +268,7 @@ define double @fcvtns_1d1s_simd(float %A) nounwind {
ret double %d
}
-define dso_local float @fcvtns_1s1h_simd(half %a) {
+define float @fcvtns_1s1h_simd(half %a) {
; CHECK-LABEL: fcvtns_1s1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtns s0, h0
@@ -278,7 +278,7 @@ define dso_local float @fcvtns_1s1h_simd(half %a) {
ret float %f
}
-define dso_local double @fcvtns_1d1h_simd(half %a) {
+define double @fcvtns_1d1h_simd(half %a) {
; CHECK-LABEL: fcvtns_1d1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtns d0, h0
@@ -288,7 +288,7 @@ define dso_local double @fcvtns_1d1h_simd(half %a) {
ret double %d
}
-define dso_local double @fcvtns_1d1d_simd(double %a) {
+define double @fcvtns_1d1d_simd(double %a) {
; CHECK-LABEL: fcvtns_1d1d_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtns d0, d0
@@ -298,7 +298,7 @@ define dso_local double @fcvtns_1d1d_simd(double %a) {
ret double %d
}
-define dso_local float @fcvtns_1s1s_simd(float %a) {
+define float @fcvtns_1s1s_simd(float %a) {
; CHECK-LABEL: fcvtns_1s1s_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtns s0, s0
@@ -328,7 +328,7 @@ define double @fcvtnu_1d1s_simd(float %A) nounwind {
ret double %d
}
-define dso_local float @fcvtnu_1s1h_simd(half %a) {
+define float @fcvtnu_1s1h_simd(half %a) {
; CHECK-LABEL: fcvtnu_1s1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtnu s0, h0
@@ -338,7 +338,7 @@ define dso_local float @fcvtnu_1s1h_simd(half %a) {
ret float %f
}
-define dso_local double @fcvtnu_1d1h_simd(half %a) {
+define double @fcvtnu_1d1h_simd(half %a) {
; CHECK-LABEL: fcvtnu_1d1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtnu d0, h0
@@ -348,7 +348,7 @@ define dso_local double @fcvtnu_1d1h_simd(half %a) {
ret double %d
}
-define dso_local double @fcvtnu_1d1d_simd(double %a) {
+define double @fcvtnu_1d1d_simd(double %a) {
; CHECK-LABEL: fcvtnu_1d1d_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtnu d0, d0
@@ -358,7 +358,7 @@ define dso_local double @fcvtnu_1d1d_simd(double %a) {
ret double %d
}
-define dso_local float @fcvtnu_1s1s_simd(float %a) {
+define float @fcvtnu_1s1s_simd(float %a) {
; CHECK-LABEL: fcvtnu_1s1s_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtnu s0, s0
@@ -388,7 +388,7 @@ define double @fcvtps_1d1s_simd(float %A) nounwind {
ret double %d
}
-define dso_local float @fcvtps_1s1h_simd(half %a) {
+define float @fcvtps_1s1h_simd(half %a) {
; CHECK-LABEL: fcvtps_1s1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtps s0, h0
@@ -398,7 +398,7 @@ define dso_local float @fcvtps_1s1h_simd(half %a) {
ret float %f
}
-define dso_local double @fcvtps_1d1h_simd(half %a) {
+define double @fcvtps_1d1h_simd(half %a) {
; CHECK-LABEL: fcvtps_1d1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtps d0, h0
@@ -408,7 +408,7 @@ define dso_local double @fcvtps_1d1h_simd(half %a) {
ret double %d
}
-define dso_local double @fcvtps_1d1d_simd(double %a) {
+define double @fcvtps_1d1d_simd(double %a) {
; CHECK-LABEL: fcvtps_1d1d_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtps d0, d0
@@ -418,7 +418,7 @@ define dso_local double @fcvtps_1d1d_simd(double %a) {
ret double %d
}
-define dso_local float @fcvtps_1s1s_simd(float %a) {
+define float @fcvtps_1s1s_simd(float %a) {
; CHECK-LABEL: fcvtps_1s1s_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtps s0, s0
@@ -448,7 +448,7 @@ define double @fcvtpu_1d1s_simd(float %A) nounwind {
ret double %d
}
-define dso_local float @fcvtpu_1s1h_simd(half %a) {
+define float @fcvtpu_1s1h_simd(half %a) {
; CHECK-LABEL: fcvtpu_1s1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtpu s0, h0
@@ -458,7 +458,7 @@ define dso_local float @fcvtpu_1s1h_simd(half %a) {
ret float %f
}
-define dso_local double @fcvtpu_1d1h_simd(half %a) {
+define double @fcvtpu_1d1h_simd(half %a) {
; CHECK-LABEL: fcvtpu_1d1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtpu d0, h0
@@ -468,7 +468,7 @@ define dso_local double @fcvtpu_1d1h_simd(half %a) {
ret double %d
}
-define dso_local double @fcvtpu_1d1d_simd(double %a) {
+define double @fcvtpu_1d1d_simd(double %a) {
; CHECK-LABEL: fcvtpu_1d1d_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtpu d0, d0
@@ -478,7 +478,7 @@ define dso_local double @fcvtpu_1d1d_simd(double %a) {
ret double %d
}
-define dso_local float @fcvtpu_1s1s_simd(float %a) {
+define float @fcvtpu_1s1s_simd(float %a) {
; CHECK-LABEL: fcvtpu_1s1s_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtpu s0, s0
@@ -508,7 +508,7 @@ define double @fcvtzs_1d1s_simd(float %A) nounwind {
ret double %d
}
-define dso_local float @fcvtzs_1s1h_simd(half %a) {
+define float @fcvtzs_1s1h_simd(half %a) {
; CHECK-LABEL: fcvtzs_1s1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzs s0, h0
@@ -518,7 +518,7 @@ define dso_local float @fcvtzs_1s1h_simd(half %a) {
ret float %f
}
-define dso_local double @fcvtzs_1d1h_simd(half %a) {
+define double @fcvtzs_1d1h_simd(half %a) {
; CHECK-LABEL: fcvtzs_1d1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzs d0, h0
@@ -528,7 +528,7 @@ define dso_local double @fcvtzs_1d1h_simd(half %a) {
ret double %d
}
-define dso_local double @fcvtzs_1d1d_simd(double %a) {
+define double @fcvtzs_1d1d_simd(double %a) {
; CHECK-LABEL: fcvtzs_1d1d_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzs d0, d0
@@ -538,7 +538,7 @@ define dso_local double @fcvtzs_1d1d_simd(double %a) {
ret double %d
}
-define dso_local float @fcvtzs_1s1s_simd(float %a) {
+define float @fcvtzs_1s1s_simd(float %a) {
; CHECK-LABEL: fcvtzs_1s1s_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzs s0, s0
@@ -568,7 +568,7 @@ define double @fcvtzu_1d1s_simd(float %A) nounwind {
ret double %d
}
-define dso_local float @fcvtzu_1s1h_simd(half %a) {
+define float @fcvtzu_1s1h_simd(half %a) {
; CHECK-LABEL: fcvtzu_1s1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzu s0, h0
@@ -578,7 +578,7 @@ define dso_local float @fcvtzu_1s1h_simd(half %a) {
ret float %f
}
-define dso_local double @fcvtzu_1d1h_simd(half %a) {
+define double @fcvtzu_1d1h_simd(half %a) {
; CHECK-LABEL: fcvtzu_1d1h_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzu d0, h0
@@ -588,7 +588,7 @@ define dso_local double @fcvtzu_1d1h_simd(half %a) {
ret double %d
}
-define dso_local double @fcvtzu_1d1d_simd(double %a) {
+define double @fcvtzu_1d1d_simd(double %a) {
; CHECK-LABEL: fcvtzu_1d1d_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzu d0, d0
@@ -598,7 +598,7 @@ define dso_local double @fcvtzu_1d1d_simd(double %a) {
ret double %d
}
-define dso_local float @fcvtzu_1s1s_simd(float %a) {
+define float @fcvtzu_1s1s_simd(float %a) {
; CHECK-LABEL: fcvtzu_1s1s_simd:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzu s0, s0
@@ -607,6 +607,3 @@ define dso_local float @fcvtzu_1s1s_simd(float %a) {
%d = bitcast i32 %vcvtah_s32_f32 to float
ret float %d
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-GI: {{.*}}
-; CHECK-SD: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index 60fcb643fb9f4..03fad65a5e863 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -405,16 +405,10 @@ define <2 x i64> @fcvtzs_2d_intrinsic(<2 x double> %A) nounwind {
}
define <1 x i64> @fcvtzs_1d_intrinsic(<1 x double> %A) nounwind {
-; CHECK-SD-LABEL: fcvtzs_1d_intrinsic:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcvtzs d0, d0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcvtzs_1d_intrinsic:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcvtzs x8, d0
-; CHECK-GI-NEXT: fmov d0, x8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcvtzs_1d_intrinsic:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
%tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %A)
ret <1 x i64> %tmp3
}
@@ -490,16 +484,10 @@ define <2 x i64> @fcvtzu_2d_intrinsic(<2 x double> %A) nounwind {
}
define <1 x i64> @fcvtzu_1d_intrinsic(<1 x double> %A) nounwind {
-; CHECK-SD-LABEL: fcvtzu_1d_intrinsic:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcvtzu d0, d0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcvtzu_1d_intrinsic:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcvtzu x8, d0
-; CHECK-GI-NEXT: fmov d0, x8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcvtzu_1d_intrinsic:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
%tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %A)
ret <1 x i64> %tmp3
}
>From 1aed880a40aba60f9c6ece977a0b8a7ef9affdc4 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 24 Sep 2025 09:06:35 +0000
Subject: [PATCH 3/6] Fix failing test
---
llvm/test/CodeGen/AArch64/arm64-vcvt.ll | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index 03fad65a5e863..6496d53bf3419 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -2,15 +2,6 @@
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for fcvtas_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtau_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtms_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtmu_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtps_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtpu_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtns_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtnu_1d
-
define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
; CHECK-LABEL: fcvtas_2s:
; CHECK: // %bb.0:
>From 44fa93a39e3ae1013c81bb736269de5e26b15b72 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 25 Sep 2025 10:03:35 +0000
Subject: [PATCH 4/6] Removed globalISel abort
---
llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
index d6b98458e0bed..b1b9fcf8a8b3c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
;
>From d087878358aa3dca9704880a3b0617d9631a4c5b Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 25 Sep 2025 10:05:50 +0000
Subject: [PATCH 5/6] Forgot about second test
---
llvm/test/CodeGen/AArch64/arm64-vcvt.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index 6496d53bf3419..627d31f9a64fc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
; CHECK-LABEL: fcvtas_2s:
>From 63d8145934755b59e1c9cad525747af80322c89c Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Fri, 26 Sep 2025 08:30:13 +0000
Subject: [PATCH 6/6] Add simd fpcvt codegen for fptoi(_sat)
---
.../lib/Target/AArch64/AArch64InstrFormats.td | 2 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 300 ++-
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 18 +-
.../AArch64/GlobalISel/regbank-fp-use-def.mir | 2 +-
.../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll | 2039 +++++++++++++++++
llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 57 +-
llvm/test/CodeGen/AArch64/arm64-vcvt.ll | 30 +-
7 files changed, 2305 insertions(+), 143 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index f07d3514d1a99..957d28a1ec308 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -5302,7 +5302,7 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
}
multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm,
- SDPatternOperator OpN = null_frag> {
+ SDPatternOperator OpN> {
// double-precision to 32-bit SIMD/FPR
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
[(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 04b3c90c2e177..f45816b7fcff5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5239,114 +5239,11 @@ let Predicates = [HasNEON, HasFPRCVT] in{
defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>;
defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>;
defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>;
- defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
- defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
+ defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs", any_fp_to_sint>;
+ defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu", any_fp_to_uint>;
}
-// AArch64's FCVT instructions saturate when out of range.
-multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
- }
- def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
- }
- def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
- (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
- (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
- }
- def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
- (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
- (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
- def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
- (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
- (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
- (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
- (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
- }
- def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
- (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
- (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
- def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
- (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
- (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
-}
-
-defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
-defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
-
-multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
- def : Pat<(i32 (to_int (round f32:$Rn))),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int (round f32:$Rn))),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int (round f64:$Rn))),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int (round f64:$Rn))),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
- // These instructions saturate like fp_to_[su]int_sat.
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
- }
- def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-}
-
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
-
-
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (any_lround f16:$Rn)),
@@ -6553,8 +6450,8 @@ defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtn
defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>;
defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
+defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
+defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
@@ -6596,6 +6493,8 @@ defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">;
+defm: FPToIntegerSIMDScalarPatterns<any_fp_to_sint, "FCVTZS">;
+defm: FPToIntegerSIMDScalarPatterns<any_fp_to_uint, "FCVTZU">;
multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
let Predicates = [HasFullFP16] in {
@@ -6652,6 +6551,193 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
+// AArch64's FCVT instructions saturate when out of range.
+multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
+ (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
+ (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ // For global-isel we can use register classes to determine
+ // which FCVT instruction to use.
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat f16:$Rn, i32)))),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat f16:$Rn, i64)))),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat f32:$Rn, i64)))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat f64:$Rn, i32)))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f16:$Rn)))),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f16:$Rn)))),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f32:$Rn)))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f64:$Rn)))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat f32:$Rn, i32)))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat f64:$Rn, i64)))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f32:$Rn)))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f64:$Rn)))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
+ def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+ def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
+ (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
+ (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
+ (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
+ (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+ def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
+ (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
+ (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+}
+
+defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
+defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
+
+multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
+ def : Pat<(i32 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ // For global-isel we can use register classes to determine
+ // which FCVT instruction to use.
+ def : Pat<(i32 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(i64 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(i64 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(f64 (bitconvert (i64 (to_int (round f32:$Rn))))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (to_int (round f64:$Rn))))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (to_int (round f32:$Rn))))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int (round f64:$Rn))))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ // These instructions saturate like fp_to_[su]int_sat.
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
+ (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
+ (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f16:$Rn), i32)))),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f16:$Rn), i64)))),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f32:$Rn), i64)))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f64:$Rn), i32)))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f32:$Rn), i32)))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f64:$Rn), i64)))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+}
+
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
+
// f16 -> s16 conversions
let Predicates = [HasFullFP16] in {
def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index f90bcc7a77cdf..5a25b85599398 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -852,7 +852,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case TargetOpcode::G_FPTOSI_SAT:
- case TargetOpcode::G_FPTOUI_SAT: {
+ case TargetOpcode::G_FPTOUI_SAT:
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI: {
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
if (DstType.isVector())
break;
@@ -860,11 +862,19 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
break;
}
- OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
+ TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
+ TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI);
+ if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
+ all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ return onlyUsesFP(UseMI, MRI, TRI) ||
+ prefersFPUse(UseMI, MRI, TRI);
+ }))
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+ else
+ OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
}
- case TargetOpcode::G_FPTOSI:
- case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_INTRINSIC_LRINT:
case TargetOpcode::G_INTRINSIC_LLRINT:
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
index b2528840a39cf..46dbc1556fb1d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
@@ -96,7 +96,7 @@ body: |
; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY2]](s32), [[COPY3]], [[SITOFP]]
- ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:gpr(s32) = G_FPTOSI [[SELECT]](s32)
+ ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:fpr(s32) = G_FPTOSI [[SELECT]](s32)
%0:_(s32) = COPY $w0
%2:_(s32) = COPY $w1
%3:_(s32) = COPY $w2
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
new file mode 100644
index 0000000000000..4a6b1f1f1d9d2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
@@ -0,0 +1,2039 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for fptosi_i32_f16_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f16_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f32_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f64_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f64_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f32_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f16_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f16_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f32_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32_simd
+
+;
+; FPTOI
+;
+
+define float @test_fptosi_f16_i32_simd(half %a) {
+; CHECK-LABEL: test_fptosi_f16_i32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, h0
+; CHECK-NEXT: ret
+ %r = fptosi half %a to i32
+ %bc = bitcast i32 %r to float
+ ret float %bc
+}
+
+define double @test_fptosi_f16_i64_simd(half %a) {
+; CHECK-LABEL: test_fptosi_f16_i64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: ret
+ %r = fptosi half %a to i64
+ %bc = bitcast i64 %r to double
+ ret double %bc
+}
+
+define float @test_fptosi_f64_i32_simd(double %a) {
+; CHECK-LABEL: test_fptosi_f64_i32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, d0
+; CHECK-NEXT: ret
+ %r = fptosi double %a to i32
+ %bc = bitcast i32 %r to float
+ ret float %bc
+}
+
+define double @test_fptosi_f32_i64_simd(float %a) {
+; CHECK-LABEL: test_fptosi_f32_i64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: ret
+ %r = fptosi float %a to i64
+ %bc = bitcast i64 %r to double
+ ret double %bc
+}
+
+define double @test_fptosi_f64_i64_simd(double %a) {
+; CHECK-LABEL: test_fptosi_f64_i64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
+ %r = fptosi double %a to i64
+ %bc = bitcast i64 %r to double
+ ret double %bc
+}
+
+
+define float @test_fptosi_f32_i32_simd(float %a) {
+; CHECK-LABEL: test_fptosi_f32_i32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %r = fptosi float %a to i32
+ %bc = bitcast i32 %r to float
+ ret float %bc
+}
+
+define float @test_fptoui_f16_i32_simd(half %a) {
+; CHECK-LABEL: test_fptoui_f16_i32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, h0
+; CHECK-NEXT: ret
+ %r = fptoui half %a to i32
+ %bc = bitcast i32 %r to float
+ ret float %bc
+}
+
+define double @test_fptoui_f16_i64_simd(half %a) {
+; CHECK-LABEL: test_fptoui_f16_i64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, h0
+; CHECK-NEXT: ret
+ %r = fptoui half %a to i64
+ %bc = bitcast i64 %r to double
+ ret double %bc
+}
+
+define float @test_fptoui_f64_i32_simd(double %a) {
+; CHECK-LABEL: test_fptoui_f64_i32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, d0
+; CHECK-NEXT: ret
+ %r = fptoui double %a to i32
+ %bc = bitcast i32 %r to float
+ ret float %bc
+}
+
+define double @test_fptoui_f32_i64_simd(float %a) {
+; CHECK-LABEL: test_fptoui_f32_i64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, s0
+; CHECK-NEXT: ret
+ %r = fptoui float %a to i64
+ %bc = bitcast i64 %r to double
+ ret double %bc
+}
+
+define double @test_fptoui_f64_i64_simd(double %a) {
+; CHECK-LABEL: test_fptoui_f64_i64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
+ %r = fptoui double %a to i64
+ %bc = bitcast i64 %r to double
+ ret double %bc
+}
+
+
+define float @test_fptoui_f32_i32_simd(float %a) {
+; CHECK-LABEL: test_fptoui_f32_i32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: ret
+ %r = fptoui float %a to i32
+ %bc = bitcast i32 %r to float
+ ret float %bc
+}
+
+
+;
+; FPTOI experimental
+;
+
+define float @fptosi_i32_f16_simd(half %x) {
+; CHECK-LABEL: fptosi_i32_f16_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, h0
+; CHECK-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict")
+ %sum = bitcast i32 %val to float
+ ret float %sum
+}
+
+define double @fptosi_i64_f16_simd(half %x) {
+; CHECK-LABEL: fptosi_i64_f16_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
+ %sum = bitcast i64 %val to double
+ ret double %sum
+}
+
+define double @fptosi_i64_f32_simd(float %x) {
+; CHECK-LABEL: fptosi_i64_f32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
+ %bc = bitcast i64 %val to double
+ ret double %bc
+}
+
+define float @fptosi_i32_f64_simd(double %x) {
+; CHECK-LABEL: fptosi_i32_f64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, d0
+; CHECK-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict")
+ %bc = bitcast i32 %val to float
+ ret float %bc
+}
+
+define double @fptosi_i64_f64_simd(double %x) {
+; CHECK-LABEL: fptosi_i64_f64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict")
+ %bc = bitcast i64 %val to double
+ ret double %bc
+}
+
+define float @fptosi_i32_f32_simd(float %x) {
+; CHECK-LABEL: fptosi_i32_f32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict")
+ %bc = bitcast i32 %val to float
+ ret float %bc
+}
+
+
+
+define float @fptoui_i32_f16_simd(half %x) {
+; CHECK-LABEL: fptoui_i32_f16_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, h0
+; CHECK-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict")
+ %sum = bitcast i32 %val to float
+ ret float %sum
+}
+
+define double @fptoui_i64_f16_simd(half %x) {
+; CHECK-LABEL: fptoui_i64_f16_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, h0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
+ %sum = bitcast i64 %val to double
+ ret double %sum
+}
+
+define double @fptoui_i64_f32_simd(float %x) {
+; CHECK-LABEL: fptoui_i64_f32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, s0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
+ %bc = bitcast i64 %val to double
+ ret double %bc
+}
+
+define float @fptoui_i32_f64_simd(double %x) {
+; CHECK-LABEL: fptoui_i32_f64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, d0
+; CHECK-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict")
+ %bc = bitcast i32 %val to float
+ ret float %bc
+}
+
+define double @fptoui_i64_f64_simd(double %x) {
+; CHECK-LABEL: fptoui_i64_f64_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict")
+ %bc = bitcast i64 %val to double
+ ret double %bc
+}
+
+define float @fptoui_i32_f32_simd(float %x) {
+; CHECK-LABEL: fptoui_i32_f32_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict")
+ %bc = bitcast i32 %val to float
+ ret float %bc
+}
+
+;
+; FPTOI rounding
+;
+
+
+define double @fcvtas_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtas_ds_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtas_ds_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl roundf
+; CHECK-GI-NEXT: fcvtzs d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @roundf(float %a) nounwind readnone
+ %i = fptosi float %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtas_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtas_sd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtas_sd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl round
+; CHECK-GI-NEXT: fcvtzs s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @round(double %a) nounwind readnone
+ %i = fptosi double %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtas_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtas_ss_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtas_ss_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl roundf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @roundf(float %a) nounwind readnone
+ %i = fptosi float %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtas_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtas_dd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtas_dd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl round
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @round(double %a) nounwind readnone
+ %i = fptosi double %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+
+define double @fcvtau_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtau_ds_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtau d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtau_ds_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl roundf
+; CHECK-GI-NEXT: fcvtzu d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @roundf(float %a) nounwind readnone
+ %i = fptoui float %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtau_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtau_sd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtau s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtau_sd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl round
+; CHECK-GI-NEXT: fcvtzu s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @round(double %a) nounwind readnone
+ %i = fptoui double %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtau_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtau_ss_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtau_ss_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl roundf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @roundf(float %a) nounwind readnone
+ %i = fptosi float %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtau_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtau_dd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtau_dd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl round
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @round(double %a) nounwind readnone
+ %i = fptosi double %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+
+define double @fcvtms_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtms_ds_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtms_ds_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floorf
+; CHECK-GI-NEXT: fcvtzs d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @floorf(float %a) nounwind readnone
+ %i = fptosi float %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtms_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtms_sd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtms_sd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floor
+; CHECK-GI-NEXT: fcvtzs s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @floor(double %a) nounwind readnone
+ %i = fptosi double %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtms_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtms_ss_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtms_ss_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floorf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @floorf(float %a) nounwind readnone
+ %i = fptosi float %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtms_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtms_dd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtms_dd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floor
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @floor(double %a) nounwind readnone
+ %i = fptosi double %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+
+
+define double @fcvtmu_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtmu_ds_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtmu d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtmu_ds_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floorf
+; CHECK-GI-NEXT: fcvtzu d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @floorf(float %a) nounwind readnone
+ %i = fptoui float %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtmu_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtmu_sd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtmu s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtmu_sd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floor
+; CHECK-GI-NEXT: fcvtzu s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @floor(double %a) nounwind readnone
+ %i = fptoui double %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtmu_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtmu_ss_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtmu_ss_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floorf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @floorf(float %a) nounwind readnone
+ %i = fptosi float %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtmu_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtmu_dd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtmu_dd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floor
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @floor(double %a) nounwind readnone
+ %i = fptosi double %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+
+define double @fcvtps_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtps_ds_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtps_ds_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceilf
+; CHECK-GI-NEXT: fcvtzs d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @ceilf(float %a) nounwind readnone
+ %i = fptosi float %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtps_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtps_sd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtps_sd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceil
+; CHECK-GI-NEXT: fcvtzs s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @ceil(double %a) nounwind readnone
+ %i = fptosi double %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtps_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtps_ss_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtps_ss_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceilf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @ceilf(float %a) nounwind readnone
+ %i = fptosi float %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtps_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtps_dd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtps_dd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceil
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @ceil(double %a) nounwind readnone
+ %i = fptosi double %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+
+define double @fcvtpu_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtpu_ds_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtpu d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtpu_ds_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceilf
+; CHECK-GI-NEXT: fcvtzu d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @ceilf(float %a) nounwind readnone
+ %i = fptoui float %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtpu_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtpu_sd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtpu s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtpu_sd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceil
+; CHECK-GI-NEXT: fcvtzu s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @ceil(double %a) nounwind readnone
+ %i = fptoui double %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtpu_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtpu_ss_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtpu_ss_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceilf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @ceilf(float %a) nounwind readnone
+ %i = fptosi float %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtpu_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtpu_dd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtpu_dd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceil
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @ceil(double %a) nounwind readnone
+ %i = fptosi double %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+
+define double @fcvtzs_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzs_ds_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzs_ds_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl truncf
+; CHECK-GI-NEXT: fcvtzs d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @truncf(float %a) nounwind readnone
+ %i = fptosi float %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtzs_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzs_sd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzs_sd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl trunc
+; CHECK-GI-NEXT: fcvtzs s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @trunc(double %a) nounwind readnone
+ %i = fptosi double %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtzs_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzs_ss_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzs_ss_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl truncf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @truncf(float %a) nounwind readnone
+ %i = fptosi float %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtzs_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzs_dd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzs_dd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl trunc
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @trunc(double %a) nounwind readnone
+ %i = fptosi double %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtzu_ds_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzu_ds_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzu d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzu_ds_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl truncf
+; CHECK-GI-NEXT: fcvtzu d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @truncf(float %a) nounwind readnone
+ %i = fptoui float %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtzu_sd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzu_sd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzu s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzu_sd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl trunc
+; CHECK-GI-NEXT: fcvtzu s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @trunc(double %a) nounwind readnone
+ %i = fptoui double %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtzu_ss_round_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzu_ss_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzu_ss_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl truncf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @truncf(float %a) nounwind readnone
+ %i = fptosi float %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtzu_dd_round_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzu_dd_round_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzu_dd_round_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl trunc
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @trunc(double %a) nounwind readnone
+ %i = fptosi double %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+
+;
+; FPTOI saturating
+;
+
+define float @fcvtzs_sh_sat_simd(half %a) {
+; CHECK-LABEL: fcvtzs_sh_sat_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, h0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtzs_dh_sat_simd(half %a) {
+; CHECK-LABEL: fcvtzs_dh_sat_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, h0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtzs_ds_sat_simd(float %a) {
+; CHECK-LABEL: fcvtzs_ds_sat_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtzs_sd_sat_simd(double %a) {
+; CHECK-LABEL: fcvtzs_sd_sat_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, d0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtzs_ss_sat_simd(float %a) {
+; CHECK-LABEL: fcvtzs_ss_sat_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtzs_dd_sat_simd(double %a) {
+; CHECK-LABEL: fcvtzs_dd_sat_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtzu_sh_sat_simd(half %a) {
+; CHECK-LABEL: fcvtzu_sh_sat_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, h0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtzu_dh_sat_simd(half %a) {
+; CHECK-LABEL: fcvtzu_dh_sat_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, h0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtzu_ds_sat_simd(float %a) {
+; CHECK-LABEL: fcvtzu_ds_sat_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, s0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtzu_sd_sat_simd(double %a) {
+; CHECK-LABEL: fcvtzu_sd_sat_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu s0, d0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtzu_ss_sat_simd(float %a) {
+; CHECK-LABEL: fcvtzu_ss_sat_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: ret
+ %i = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtzu_dd_sat_simd(double %a) {
+; CHECK-LABEL: fcvtzu_dd_sat_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: ret
+ %i = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+;
+; FPTOI saturating with rounding
+;
+
+define float @fcvtas_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtas_sh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas s0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtas_sh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frinta h0, h0
+; CHECK-GI-NEXT: fcvtzs s0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.round.f16(half %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtas_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtas_dh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas d0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtas_dh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frinta h0, h0
+; CHECK-GI-NEXT: fcvtzs d0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.round.f16(half %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtas_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtas_ds_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtas_ds_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl roundf
+; CHECK-GI-NEXT: fcvtzs d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @roundf(float %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtas_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtas_sd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtas_sd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl round
+; CHECK-GI-NEXT: fcvtzs s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @round(double %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtas_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtas_ss_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtas_ss_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl roundf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @roundf(float %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtas_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtas_dd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtas_dd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl round
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @round(double %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtau_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtau_sh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtau s0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtau_sh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frinta h0, h0
+; CHECK-GI-NEXT: fcvtzu s0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.round.f16(half %a) nounwind readnone
+ %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtau_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtau_dh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtau d0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtau_dh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frinta h0, h0
+; CHECK-GI-NEXT: fcvtzu d0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.round.f16(half %a) nounwind readnone
+ %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtau_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtau_ds_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtau d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtau_ds_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl roundf
+; CHECK-GI-NEXT: fcvtzu d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @roundf(float %a) nounwind readnone
+ %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtau_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtau_sd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtau s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtau_sd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl round
+; CHECK-GI-NEXT: fcvtzu s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @round(double %a) nounwind readnone
+ %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtau_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtau_ss_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtau_ss_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl roundf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @roundf(float %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtau_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtau_dd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtas d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtau_dd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl round
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @round(double %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtms_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtms_sh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms s0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtms_sh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintm h0, h0
+; CHECK-GI-NEXT: fcvtzs s0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.floor.f16(half %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtms_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtms_dh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms d0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtms_dh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintm h0, h0
+; CHECK-GI-NEXT: fcvtzs d0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.floor.f16(half %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtms_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtms_ds_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtms_ds_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floorf
+; CHECK-GI-NEXT: fcvtzs d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @floorf(float %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtms_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtms_sd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtms_sd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floor
+; CHECK-GI-NEXT: fcvtzs s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @floor(double %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtms_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtms_ss_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtms_ss_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floorf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @floorf(float %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtms_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtms_dd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtms_dd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floor
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @floor(double %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtmu_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtmu_sh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtmu s0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtmu_sh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintm h0, h0
+; CHECK-GI-NEXT: fcvtzu s0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.floor.f16(half %a) nounwind readnone
+ %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtmu_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtmu_dh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtmu d0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtmu_dh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintm h0, h0
+; CHECK-GI-NEXT: fcvtzu d0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.floor.f16(half %a) nounwind readnone
+ %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtmu_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtmu_ds_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtmu d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtmu_ds_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floorf
+; CHECK-GI-NEXT: fcvtzu d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @floorf(float %a) nounwind readnone
+ %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtmu_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtmu_sd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtmu s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtmu_sd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floor
+; CHECK-GI-NEXT: fcvtzu s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @floor(double %a) nounwind readnone
+ %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtmu_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtmu_ss_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtmu_ss_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floorf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @floorf(float %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtmu_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtmu_dd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtms d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtmu_dd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl floor
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @floor(double %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtps_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtps_sh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps s0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtps_sh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintp h0, h0
+; CHECK-GI-NEXT: fcvtzs s0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.ceil.f16(half %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtps_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtps_dh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps d0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtps_dh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintp h0, h0
+; CHECK-GI-NEXT: fcvtzs d0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.ceil.f16(half %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtps_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtps_ds_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtps_ds_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceilf
+; CHECK-GI-NEXT: fcvtzs d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @ceilf(float %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtps_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtps_sd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtps_sd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceil
+; CHECK-GI-NEXT: fcvtzs s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @ceil(double %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtps_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtps_ss_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtps_ss_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceilf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @ceilf(float %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtps_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtps_dd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtps_dd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceil
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @ceil(double %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtpu_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtpu_sh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtpu s0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtpu_sh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintp h0, h0
+; CHECK-GI-NEXT: fcvtzu s0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.ceil.f16(half %a) nounwind readnone
+ %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtpu_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtpu_dh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtpu d0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtpu_dh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintp h0, h0
+; CHECK-GI-NEXT: fcvtzu d0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.ceil.f16(half %a) nounwind readnone
+ %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtpu_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtpu_ds_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtpu d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtpu_ds_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceilf
+; CHECK-GI-NEXT: fcvtzu d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @ceilf(float %a) nounwind readnone
+ %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtpu_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtpu_sd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtpu s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtpu_sd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceil
+; CHECK-GI-NEXT: fcvtzu s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @ceil(double %a) nounwind readnone
+ %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtpu_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtpu_ss_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtpu_ss_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceilf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @ceilf(float %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtpu_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtpu_dd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtps d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtpu_dd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl ceil
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @ceil(double %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtzs_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtzs_sh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs s0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzs_sh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintz h0, h0
+; CHECK-GI-NEXT: fcvtzs s0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.trunc.f16(half %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtzs_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtzs_dh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs d0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzs_dh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintz h0, h0
+; CHECK-GI-NEXT: fcvtzs d0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.trunc.f16(half %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtzs_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzs_ds_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzs_ds_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl truncf
+; CHECK-GI-NEXT: fcvtzs d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @truncf(float %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtzs_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzs_sd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzs_sd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl trunc
+; CHECK-GI-NEXT: fcvtzs s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @trunc(double %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtzs_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzs_ss_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzs_ss_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl truncf
+; CHECK-GI-NEXT: fcvtzs s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @truncf(float %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtzs_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzs_dd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzs_dd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl trunc
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @trunc(double %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtzu_sh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtzu_sh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzu s0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzu_sh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintz h0, h0
+; CHECK-GI-NEXT: fcvtzu s0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.trunc.f16(half %a) nounwind readnone
+ %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtzu_dh_simd(half %a) {
+; CHECK-SD-LABEL: fcvtzu_dh_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzu d0, h0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzu_dh_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintz h0, h0
+; CHECK-GI-NEXT: fcvtzu d0, h0
+; CHECK-GI-NEXT: ret
+ %r = call half @llvm.trunc.f16(half %a) nounwind readnone
+ %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtzu_ds_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzu_ds_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzu d0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzu_ds_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl truncf
+; CHECK-GI-NEXT: fcvtzu d0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @truncf(float %a) nounwind readnone
+ %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtzu_sd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzu_sd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzu s0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzu_sd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl trunc
+; CHECK-GI-NEXT: fcvtzu s0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @trunc(double %a) nounwind readnone
+ %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtzu_ss_simd(float %a) {
+; CHECK-SD-LABEL: fcvtzu_ss_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzu s0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzu_ss_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl truncf
+; CHECK-GI-NEXT: fcvtzu s0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call float @truncf(float %a) nounwind readnone
+ %i = call i32 @llvm.fptoui.sat.i32.f32(float %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtzu_dd_simd(double %a) {
+; CHECK-SD-LABEL: fcvtzu_dd_simd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzu d0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzu_dd_simd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl trunc
+; CHECK-GI-NEXT: fcvtzu d0, d0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %r = call double @trunc(double %a) nounwind readnone
+ %i = call i64 @llvm.fptoui.sat.i64.f64(double %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+declare half @llvm.floor.f16(half) nounwind readnone
+declare half @llvm.ceil.f16(half) nounwind readnone
+declare half @llvm.trunc.f16(half) nounwind readnone
+declare half @llvm.round.f16(half) nounwind readnone
+declare float @floorf(float) nounwind readnone
+declare float @ceilf(float) nounwind readnone
+declare float @truncf(float) nounwind readnone
+declare float @roundf(float) nounwind readnone
+declare double @floor(double) nounwind readnone
+declare double @ceil(double) nounwind readnone
+declare double @trunc(double) nounwind readnone
+declare double @round(double) nounwind readnone
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index e18a5f695ba29..d8f370884c84a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -980,12 +980,18 @@ define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
}
define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
-; CHECK-LABEL: test_bitcastv8i8tov1f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: neg v0.8b, v0.8b
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_bitcastv8i8tov1f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: neg v0.8b, v0.8b
+; CHECK-SD-NEXT: fcvtzs x8, d0
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_bitcastv8i8tov1f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: neg v0.8b, v0.8b
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ret
%sub.i = sub <8 x i8> zeroinitializer, %a
%1 = bitcast <8 x i8> %sub.i to <1 x double>
%vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -993,12 +999,18 @@ define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
}
define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
-; CHECK-LABEL: test_bitcastv4i16tov1f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: neg v0.4h, v0.4h
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_bitcastv4i16tov1f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: neg v0.4h, v0.4h
+; CHECK-SD-NEXT: fcvtzs x8, d0
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_bitcastv4i16tov1f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: neg v0.4h, v0.4h
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ret
%sub.i = sub <4 x i16> zeroinitializer, %a
%1 = bitcast <4 x i16> %sub.i to <1 x double>
%vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -1006,12 +1018,18 @@ define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
}
define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
-; CHECK-LABEL: test_bitcastv2i32tov1f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: neg v0.2s, v0.2s
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_bitcastv2i32tov1f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: neg v0.2s, v0.2s
+; CHECK-SD-NEXT: fcvtzs x8, d0
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_bitcastv2i32tov1f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: neg v0.2s, v0.2s
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ret
%sub.i = sub <2 x i32> zeroinitializer, %a
%1 = bitcast <2 x i32> %sub.i to <1 x double>
%vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -1031,8 +1049,7 @@ define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: neg x8, x8
; CHECK-GI-NEXT: fmov d0, x8
-; CHECK-GI-NEXT: fcvtzs x8, d0
-; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: fcvtzs d0, d0
; CHECK-GI-NEXT: ret
%sub.i = sub <1 x i64> zeroinitializer, %a
%1 = bitcast <1 x i64> %sub.i to <1 x double>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index 627d31f9a64fc..1e0cfa0201263 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -359,11 +359,16 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind {
; FIXME: Generate "fcvtzs d0, d0"?
define <1 x i64> @fcvtzs_1d(<1 x double> %A) nounwind {
-; CHECK-LABEL: fcvtzs_1d:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcvtzs_1d:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzs x8, d0
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzs_1d:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ret
%tmp3 = fptosi <1 x double> %A to <1 x i64>
ret <1 x i64> %tmp3
}
@@ -438,11 +443,16 @@ define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind {
; FIXME: Generate "fcvtzu d0, d0"?
define <1 x i64> @fcvtzu_1d(<1 x double> %A) nounwind {
-; CHECK-LABEL: fcvtzu_1d:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu x8, d0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcvtzu_1d:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtzu x8, d0
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcvtzu_1d:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtzu d0, d0
+; CHECK-GI-NEXT: ret
%tmp3 = fptoui <1 x double> %A to <1 x i64>
ret <1 x i64> %tmp3
}
More information about the llvm-commits
mailing list