[llvm] [AArch64][SVE] Avoid transfer to GPRs for fp -> int -> fp conversions (PR #112564)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 22 05:44:06 PDT 2024
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/112564
>From 564d9a78713e1f95dbef6a14441b82d77c25f04d Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 16 Oct 2024 14:45:43 +0000
Subject: [PATCH 1/3] [AArch64][SVE] Avoid transfer to GPRs for fp -> int -> fp
conversions
When Neon is not available use SVE variants of FCVTZS, FCVTZU, UCVTF,
and SCVTF for fp -> int -> fp conversions to avoid moving values
to/from GPRs which may be expensive.
Note: With +sme2p2 the single-element vector Neon variants of these
instructions could be used instead (but that feature is not implemented
yet).
Follow up to #112213.
---
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 35 ++++++++
.../sve-streaming-mode-cvt-fp-int-fp.ll | 89 +++++++++++++++----
2 files changed, 107 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 2a857234c7d745..19dc2016f9fcf7 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2421,6 +2421,41 @@ let Predicates = [HasSVEorSME] in {
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>;
} // End HasSVEorSME
+// Helper for creating fp -> int -> fp conversions using SVE.
+class sve_fp_int_fp_cvt<Instruction PTRUE, Instruction FROM_INT, Instruction TO_INT, SubRegIndex sub>
+ : OutPatFrag<(ops node: $Rn),
+ (EXTRACT_SUBREG
+ (FROM_INT (IMPLICIT_DEF), (PTRUE 1),
+ (TO_INT (IMPLICIT_DEF), (PTRUE 1),
+ (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub))), sub)>;
+
+// Some float -> int -> float conversion patterns where we want to keep the int
+// values in FP registers using the SVE instructions to avoid costly GPR <-> FPR
+// register transfers. Only used when NEON is not available (e.g. in streaming
+// functions).
+// TODO: When +sme2p2 is available single-element vectors should be preferred.
+def HasNoNEON : Predicate<"!Subtarget->isNeonAvailable()">;
+let Predicates = [HasSVEorSME, HasNoNEON] in {
+def : Pat<
+ (f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
+ (sve_fp_int_fp_cvt<PTRUE_D, SCVTF_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoD, dsub> $Rn)>;
+def : Pat<
+ (f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
+ (sve_fp_int_fp_cvt<PTRUE_D, UCVTF_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoD, dsub> $Rn)>;
+def : Pat<
+ (f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
+ (sve_fp_int_fp_cvt<PTRUE_S, SCVTF_ZPmZ_StoS, FCVTZS_ZPmZ_StoS, ssub> $Rn)>;
+def : Pat<
+ (f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
+ (sve_fp_int_fp_cvt<PTRUE_S, UCVTF_ZPmZ_StoS, FCVTZU_ZPmZ_StoS, ssub> $Rn)>;
+def : Pat<
+ (f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
+ (sve_fp_int_fp_cvt<PTRUE_H, SCVTF_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoH, hsub> $Rn)>;
+def : Pat<
+ (f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
+ (sve_fp_int_fp_cvt<PTRUE_H, UCVTF_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoH, hsub> $Rn)>;
+} // End HasSVEorSME, HasNoNEON
+
let Predicates = [HasBF16, HasSVEorSME] in {
defm BFDOT_ZZZ : sve_float_dot<0b1, 0b0, ZPR32, ZPR16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>;
defm BFDOT_ZZI : sve_float_dot_indexed<0b1, 0b00, ZPR16, ZPR3b16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
index 9aadf3133ba197..fbbe2cc64ad248 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING
target triple = "aarch64-unknown-linux-gnu"
@@ -7,10 +8,19 @@ target triple = "aarch64-unknown-linux-gnu"
define double @t1(double %x) {
; CHECK-LABEL: t1:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: scvtf d0, x8
+; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NONEON-NOSVE-LABEL: t1:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs x8, d0
+; NONEON-NOSVE-NEXT: scvtf d0, x8
+; NONEON-NOSVE-NEXT: ret
+;
; NON-STREAMING-LABEL: t1:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzs d0, d0
@@ -25,10 +35,19 @@ entry:
define float @t2(float %x) {
; CHECK-LABEL: t2:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzs w8, s0
-; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ptrue p0.s, vl1
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
+; NONEON-NOSVE-LABEL: t2:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs w8, s0
+; NONEON-NOSVE-NEXT: scvtf s0, w8
+; NONEON-NOSVE-NEXT: ret
+;
; NON-STREAMING-LABEL: t2:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzs s0, s0
@@ -43,12 +62,21 @@ entry:
define half @t3(half %x) {
; CHECK-LABEL: t3:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvtzs w8, s0
-; CHECK-NEXT: scvtf s0, w8
-; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ptrue p0.h, vl1
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
+; NONEON-NOSVE-LABEL: t3:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzs w8, s0
+; NONEON-NOSVE-NEXT: scvtf s0, w8
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
+;
; NON-STREAMING-LABEL: t3:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvt s0, h0
@@ -65,10 +93,19 @@ entry:
define double @t4(double %x) {
; CHECK-LABEL: t4:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzu x8, d0
-; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NONEON-NOSVE-LABEL: t4:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu x8, d0
+; NONEON-NOSVE-NEXT: ucvtf d0, x8
+; NONEON-NOSVE-NEXT: ret
+;
; NON-STREAMING-LABEL: t4:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzu d0, d0
@@ -83,10 +120,19 @@ entry:
define float @t5(float %x) {
; CHECK-LABEL: t5:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzu w8, s0
-; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: ptrue p0.s, vl1
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
+; NONEON-NOSVE-LABEL: t5:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu w8, s0
+; NONEON-NOSVE-NEXT: ucvtf s0, w8
+; NONEON-NOSVE-NEXT: ret
+;
; NON-STREAMING-LABEL: t5:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzu s0, s0
@@ -101,12 +147,21 @@ entry:
define half @t6(half %x) {
; CHECK-LABEL: t6:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvtzu w8, s0
-; CHECK-NEXT: ucvtf s0, w8
-; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ptrue p0.h, vl1
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
+; NONEON-NOSVE-LABEL: t6:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzu w8, s0
+; NONEON-NOSVE-NEXT: ucvtf s0, w8
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
+;
; NON-STREAMING-LABEL: t6:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvt s0, h0
>From 974c5aecdb544b0cdfa48ea262edf4d430c52317 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 16 Oct 2024 15:22:57 +0000
Subject: [PATCH 2/3] Note that this is for scalars
---
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 27 ++++++++++---------
1 file changed, 14 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 19dc2016f9fcf7..078ea43a76f31d 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2421,39 +2421,40 @@ let Predicates = [HasSVEorSME] in {
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>;
} // End HasSVEorSME
-// Helper for creating fp -> int -> fp conversions using SVE.
-class sve_fp_int_fp_cvt<Instruction PTRUE, Instruction FROM_INT, Instruction TO_INT, SubRegIndex sub>
+// Helper for creating scalar fp -> int -> fp conversions using SVE.
+class sve_scalar_fp_int_fp_cvt
+ <Instruction PTRUE, Instruction FROM_INT, Instruction TO_INT, SubRegIndex sub>
: OutPatFrag<(ops node: $Rn),
(EXTRACT_SUBREG
(FROM_INT (IMPLICIT_DEF), (PTRUE 1),
(TO_INT (IMPLICIT_DEF), (PTRUE 1),
(INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub))), sub)>;
-// Some float -> int -> float conversion patterns where we want to keep the int
-// values in FP registers using the SVE instructions to avoid costly GPR <-> FPR
-// register transfers. Only used when NEON is not available (e.g. in streaming
-// functions).
-// TODO: When +sme2p2 is available single-element vectors should be preferred.
+// Some scalar float -> int -> float conversion patterns where we want to keep
+// the int values in FP registers to avoid costly GPR <-> FPR register
+// transfers using SVE instructions. Only used when NEON is not available (e.g.
+// in streaming functions).
+// TODO: When +sme2p2 is available Neon single-element vectors should be preferred.
def HasNoNEON : Predicate<"!Subtarget->isNeonAvailable()">;
let Predicates = [HasSVEorSME, HasNoNEON] in {
def : Pat<
(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
- (sve_fp_int_fp_cvt<PTRUE_D, SCVTF_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoD, dsub> $Rn)>;
+ (sve_scalar_fp_int_fp_cvt<PTRUE_D, SCVTF_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoD, dsub> $Rn)>;
def : Pat<
(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
- (sve_fp_int_fp_cvt<PTRUE_D, UCVTF_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoD, dsub> $Rn)>;
+ (sve_scalar_fp_int_fp_cvt<PTRUE_D, UCVTF_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoD, dsub> $Rn)>;
def : Pat<
(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
- (sve_fp_int_fp_cvt<PTRUE_S, SCVTF_ZPmZ_StoS, FCVTZS_ZPmZ_StoS, ssub> $Rn)>;
+ (sve_scalar_fp_int_fp_cvt<PTRUE_S, SCVTF_ZPmZ_StoS, FCVTZS_ZPmZ_StoS, ssub> $Rn)>;
def : Pat<
(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
- (sve_fp_int_fp_cvt<PTRUE_S, UCVTF_ZPmZ_StoS, FCVTZU_ZPmZ_StoS, ssub> $Rn)>;
+ (sve_scalar_fp_int_fp_cvt<PTRUE_S, UCVTF_ZPmZ_StoS, FCVTZU_ZPmZ_StoS, ssub> $Rn)>;
def : Pat<
(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
- (sve_fp_int_fp_cvt<PTRUE_H, SCVTF_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoH, hsub> $Rn)>;
+ (sve_scalar_fp_int_fp_cvt<PTRUE_H, SCVTF_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoH, hsub> $Rn)>;
def : Pat<
(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
- (sve_fp_int_fp_cvt<PTRUE_H, UCVTF_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoH, hsub> $Rn)>;
+ (sve_scalar_fp_int_fp_cvt<PTRUE_H, UCVTF_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoH, hsub> $Rn)>;
} // End HasSVEorSME, HasNoNEON
let Predicates = [HasBF16, HasSVEorSME] in {
>From aea6409978af044f0f0f89fa85c7e8a6296e285a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 21 Oct 2024 21:11:44 +0000
Subject: [PATCH 3/3] Lower scalar FP converts to SVE
---
.../Target/AArch64/AArch64ISelLowering.cpp | 98 ++++-
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 40 +-
.../sve-streaming-mode-cvt-fp-int-fp.ll | 20 +-
.../sve-streaming-mode-cvt-fp-to-int.ll | 264 +++++++++++++
.../sve-streaming-mode-cvt-int-to-fp.ll | 265 +++++++++++++
...e-streaming-mode-fixed-length-fp-to-int.ll | 366 ++++++++----------
...e-streaming-mode-fixed-length-int-to-fp.ll | 121 ++++--
7 files changed, 880 insertions(+), 294 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 927c057adc00df..22fa812b129aca 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1454,8 +1454,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
@@ -2138,6 +2142,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::FP_ROUND, VT, Default);
setOperationAction(ISD::FP_TO_SINT, VT, Default);
setOperationAction(ISD::FP_TO_UINT, VT, Default);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Default);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Default);
setOperationAction(ISD::FRINT, VT, Default);
setOperationAction(ISD::LRINT, VT, Default);
setOperationAction(ISD::LLRINT, VT, Default);
@@ -2164,6 +2170,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::SIGN_EXTEND, VT, Default);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Default);
setOperationAction(ISD::SINT_TO_FP, VT, Default);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Default);
setOperationAction(ISD::SMAX, VT, Default);
setOperationAction(ISD::SMIN, VT, Default);
setOperationAction(ISD::SPLAT_VECTOR, VT, Default);
@@ -2174,6 +2181,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::TRUNCATE, VT, Default);
setOperationAction(ISD::UDIV, VT, Default);
setOperationAction(ISD::UINT_TO_FP, VT, Default);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Default);
setOperationAction(ISD::UMAX, VT, Default);
setOperationAction(ISD::UMIN, VT, Default);
setOperationAction(ISD::VECREDUCE_ADD, VT, Default);
@@ -4550,9 +4558,10 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
EVT VT = Op.getValueType();
if (VT.isScalableVector()) {
- unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
- ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
- : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
+ unsigned Opc = Op.getOpcode();
+ bool IsSigned = Opc == ISD::FP_TO_SINT || Opc == ISD::STRICT_FP_TO_SINT;
+ unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
+ : AArch64ISD::FCVTZU_MERGE_PASSTHRU;
return LowerToPredicatedOp(Op, DAG, Opcode);
}
@@ -4628,6 +4637,51 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
return Op;
}
+static bool CanLowerToScalarSVEFPIntConversion(EVT VT) {
+ if (!VT.isSimple())
+ return false;
+ // There are SVE instructions that can convert to/from all pairs of these int
+ // and float types. Note: We don't bother with i8 or i16 as those are illegal
+ // types for scalars.
+ return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
+ VT.getSimpleVT().SimpleTy);
+}
+
+/// Lowers a scalar FP conversion (to/from) int to SVE.
+static SDValue LowerScalarFPConversionToSVE(SDValue Op, SelectionDAG &DAG) {
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
+ EVT SrcTy = SrcVal.getValueType();
+ EVT DestTy = Op.getValueType();
+ EVT SrcVecTy;
+ EVT DestVecTy;
+ // Use a packed vector for the larger type.
+ // Note: For conversions such as FCVTZS_ZPmZ_DtoS, and UCVTF_ZPmZ_StoD that
+ // notionally take or return a nxv2i32 type we must instead use a nxv4i32, as
+ // (unlike floats) nxv2i32 is an illegal unpacked type.
+ if (DestTy.bitsGT(SrcTy)) {
+ DestVecTy = getPackedSVEVectorVT(DestTy);
+ SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
+ : DestVecTy.changeVectorElementType(SrcTy);
+ } else {
+ SrcVecTy = getPackedSVEVectorVT(SrcTy);
+ DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
+ : SrcVecTy.changeVectorElementType(DestTy);
+ }
+ SDLoc dl(Op);
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
+ SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SrcVecTy,
+ DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
+ Vec = IsStrict ? DAG.getNode(Op.getOpcode(), dl, {DestVecTy, MVT::Other},
+ {Op.getOperand(0), Vec})
+ : DAG.getNode(Op.getOpcode(), dl, DestVecTy, Vec);
+ SDValue Scalar =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, ZeroIdx);
+ if (IsStrict)
+ return DAG.getMergeValues({Scalar, Vec.getValue(1)}, dl);
+ return Scalar;
+}
+
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
@@ -4636,6 +4690,12 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
if (SrcVal.getValueType().isVector())
return LowerVectorFP_TO_INT(Op, DAG);
+ if (!Subtarget->isNeonAvailable() &&
+ Subtarget->isSVEorStreamingSVEAvailable() &&
+ CanLowerToScalarSVEFPIntConversion(SrcVal.getValueType()) &&
+ CanLowerToScalarSVEFPIntConversion(Op.getValueType()))
+ return LowerScalarFPConversionToSVE(Op, DAG);
+
// f16 conversions are promoted to f32 when full fp16 is not supported.
if ((SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
SrcVal.getValueType() == MVT::bf16) {
@@ -4939,6 +4999,12 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
+ if (!Subtarget->isNeonAvailable() &&
+ Subtarget->isSVEorStreamingSVEAvailable() &&
+ CanLowerToScalarSVEFPIntConversion(SrcVal.getValueType()) &&
+ CanLowerToScalarSVEFPIntConversion(Op.getValueType()))
+ return LowerScalarFPConversionToSVE(Op, DAG);
+
bool IsSigned = Op->getOpcode() == ISD::STRICT_SINT_TO_FP ||
Op->getOpcode() == ISD::SINT_TO_FP;
@@ -28293,7 +28359,21 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
unsigned NewOp) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
- auto Pg = getPredicateForVector(DAG, DL, VT);
+ SDValue Pg;
+
+ // FCVTZS_ZPmZ_DtoS and FCVTZU_ZPmZ_DtoS are special cases. These operations
+ // return nxv4i32 rather than the correct nxv2i32, as nxv2i32 is an illegal
+ // unpacked type. So, in this case, we take the predicate size from the
+ // operand.
+ SDValue LastOp{};
+ if ((NewOp == AArch64ISD::FCVTZU_MERGE_PASSTHRU ||
+ NewOp == AArch64ISD::FCVTZS_MERGE_PASSTHRU) &&
+ VT == MVT::nxv4i32 &&
+ (LastOp = Op->ops().back().get()).getValueType() == MVT::nxv2f64) {
+ Pg = getPredicateForVector(DAG, DL, LastOp.getValueType());
+ } else {
+ Pg = getPredicateForVector(DAG, DL, VT);
+ }
if (VT.isFixedLengthVector()) {
assert(isTypeLegal(VT) && "Expected only legal fixed-width types");
@@ -28329,7 +28409,12 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
SmallVector<SDValue, 4> Operands = {Pg};
+ SDValue Chain{};
for (const SDValue &V : Op->op_values()) {
+ if (!isa<CondCodeSDNode>(V) && V.getValueType() == MVT::Other) {
+ Chain = V;
+ continue;
+ }
assert((!V.getValueType().isVector() ||
V.getValueType().isScalableVector()) &&
"Only scalable vectors are supported!");
@@ -28339,7 +28424,10 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
if (isMergePassthruOpcode(NewOp))
Operands.push_back(DAG.getUNDEF(VT));
- return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
+ auto NewNode = DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
+ if (Chain)
+ return DAG.getMergeValues({NewNode, Chain}, DL);
+ return NewNode;
}
// If a fixed length vector operation has no side effects when applied to
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 078ea43a76f31d..c7d0f852a8a517 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2338,8 +2338,8 @@ let Predicates = [HasSVEorSME] in {
defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>;
defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
- defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
- defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, AArch64fcvtzs_mt, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, AArch64fcvtzu_mt, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
@@ -2421,42 +2421,6 @@ let Predicates = [HasSVEorSME] in {
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>;
} // End HasSVEorSME
-// Helper for creating scalar fp -> int -> fp conversions using SVE.
-class sve_scalar_fp_int_fp_cvt
- <Instruction PTRUE, Instruction FROM_INT, Instruction TO_INT, SubRegIndex sub>
- : OutPatFrag<(ops node: $Rn),
- (EXTRACT_SUBREG
- (FROM_INT (IMPLICIT_DEF), (PTRUE 1),
- (TO_INT (IMPLICIT_DEF), (PTRUE 1),
- (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub))), sub)>;
-
-// Some scalar float -> int -> float conversion patterns where we want to keep
-// the int values in FP registers to avoid costly GPR <-> FPR register
-// transfers using SVE instructions. Only used when NEON is not available (e.g.
-// in streaming functions).
-// TODO: When +sme2p2 is available Neon single-element vectors should be preferred.
-def HasNoNEON : Predicate<"!Subtarget->isNeonAvailable()">;
-let Predicates = [HasSVEorSME, HasNoNEON] in {
-def : Pat<
- (f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
- (sve_scalar_fp_int_fp_cvt<PTRUE_D, SCVTF_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoD, dsub> $Rn)>;
-def : Pat<
- (f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
- (sve_scalar_fp_int_fp_cvt<PTRUE_D, UCVTF_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoD, dsub> $Rn)>;
-def : Pat<
- (f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
- (sve_scalar_fp_int_fp_cvt<PTRUE_S, SCVTF_ZPmZ_StoS, FCVTZS_ZPmZ_StoS, ssub> $Rn)>;
-def : Pat<
- (f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
- (sve_scalar_fp_int_fp_cvt<PTRUE_S, UCVTF_ZPmZ_StoS, FCVTZU_ZPmZ_StoS, ssub> $Rn)>;
-def : Pat<
- (f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
- (sve_scalar_fp_int_fp_cvt<PTRUE_H, SCVTF_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoH, hsub> $Rn)>;
-def : Pat<
- (f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
- (sve_scalar_fp_int_fp_cvt<PTRUE_H, UCVTF_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoH, hsub> $Rn)>;
-} // End HasSVEorSME, HasNoNEON
-
let Predicates = [HasBF16, HasSVEorSME] in {
defm BFDOT_ZZZ : sve_float_dot<0b1, 0b0, ZPR32, ZPR16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>;
defm BFDOT_ZZI : sve_float_dot_indexed<0b1, 0b00, ZPR16, ZPR3b16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
index fbbe2cc64ad248..6b5ede00254c58 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -8,7 +8,7 @@ target triple = "aarch64-unknown-linux-gnu"
define double @t1(double %x) {
; CHECK-LABEL: t1:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
@@ -35,7 +35,7 @@ entry:
define float @t2(float %x) {
; CHECK-LABEL: t2:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s, vl1
+; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
@@ -62,10 +62,10 @@ entry:
define half @t3(half %x) {
; CHECK-LABEL: t3:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h, vl1
+; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
-; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
@@ -93,7 +93,7 @@ entry:
define double @t4(double %x) {
; CHECK-LABEL: t4:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
@@ -120,7 +120,7 @@ entry:
define float @t5(float %x) {
; CHECK-LABEL: t5:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s, vl1
+; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
@@ -147,10 +147,10 @@ entry:
define half @t6(half %x) {
; CHECK-LABEL: t6:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h, vl1
+; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
-; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
new file mode 100644
index 00000000000000..60d3124f5b21e8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
@@ -0,0 +1,264 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @f16_to_s32(half %x) {
+; CHECK-LABEL: f16_to_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f16_to_s32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzs w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi half %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f16_to_s64(half %x) {
+; CHECK-LABEL: f16_to_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f16_to_s64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzs x0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi half %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f32_to_s32(float %x) {
+; CHECK-LABEL: f32_to_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f32_to_s32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi float %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f32_to_s64(float %x) {
+; CHECK-LABEL: f32_to_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f32_to_s64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs x0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi float %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f64_to_s32(double %x) {
+; CHECK-LABEL: f64_to_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.d
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f64_to_s32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs w0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi double %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f64_to_s64(double %x) {
+; CHECK-LABEL: f64_to_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f64_to_s64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs x0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi double %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f16_to_u32(half %x) {
+; CHECK-LABEL: f16_to_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f16_to_u32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzu w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui half %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f16_to_u64(half %x) {
+; CHECK-LABEL: f16_to_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f16_to_u64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzu x0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui half %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f32_to_u32(float %x) {
+; CHECK-LABEL: f32_to_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f32_to_u32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui float %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f32_to_u64(float %x) {
+; CHECK-LABEL: f32_to_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f32_to_u64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu x0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui float %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f64_to_u32(double %x) {
+; CHECK-LABEL: f64_to_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.d
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f64_to_u32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu w0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui double %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f64_to_u64(double %x) {
+; CHECK-LABEL: f64_to_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f64_to_u64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu x0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui double %x to i64
+ ret i64 %cvt
+}
+
+define i32 @strict_convert_signed(double %x) {
+; CHECK-LABEL: strict_convert_signed:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.d
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_signed:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs w0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") #0
+ ret i32 %cvt
+}
+
+define i32 @strict_convert_unsigned(float %x) {
+; CHECK-LABEL: strict_convert_unsigned:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_unsigned:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0
+ ret i32 %cvt
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
new file mode 100644
index 00000000000000..42be60ad559705
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
@@ -0,0 +1,265 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define half @s32_to_f16(i32 %x) {
+; CHECK-LABEL: s32_to_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: s32_to_f16:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf s0, w0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = sitofp i32 %x to half
+ ret half %cvt
+}
+
+define float @s32_to_f32(i32 %x) {
+; CHECK-LABEL: s32_to_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: s32_to_f32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf s0, w0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = sitofp i32 %x to float
+ ret float %cvt
+}
+
+define double @s32_to_f64(i32 %x) {
+; CHECK-LABEL: s32_to_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.d, p0/m, z0.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: s32_to_f64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf d0, w0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = sitofp i32 %x to double
+ ret double %cvt
+}
+
+define half @u32_to_f16(i32 %x) {
+; CHECK-LABEL: u32_to_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: u32_to_f16:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf s0, w0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = uitofp i32 %x to half
+ ret half %cvt
+}
+
+define float @u32_to_f32(i32 %x) {
+; CHECK-LABEL: u32_to_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: u32_to_f32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf s0, w0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = uitofp i32 %x to float
+ ret float %cvt
+}
+
+define double @u32_to_f64(i32 %x) {
+; CHECK-LABEL: u32_to_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: u32_to_f64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf d0, w0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = uitofp i32 %x to double
+ ret double %cvt
+}
+
+define half @s64_to_f16(i64 %x) {
+; CHECK-LABEL: s64_to_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: s64_to_f16:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf s0, x0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = sitofp i64 %x to half
+ ret half %cvt
+}
+
+define float @s64_to_f32(i64 %x) {
+; CHECK-LABEL: s64_to_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: s64_to_f32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf s0, x0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = sitofp i64 %x to float
+ ret float %cvt
+}
+
+define double @s64_to_f64(i64 %x) {
+; CHECK-LABEL: s64_to_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: s64_to_f64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf d0, x0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = sitofp i64 %x to double
+ ret double %cvt
+}
+
+define half @u64_to_f16(i64 %x) {
+; CHECK-LABEL: u64_to_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: u64_to_f16:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf s0, x0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = uitofp i64 %x to half
+ ret half %cvt
+}
+
+define float @u64_to_f32(i64 %x) {
+; CHECK-LABEL: u64_to_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: u64_to_f32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf s0, x0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = uitofp i64 %x to float
+ ret float %cvt
+}
+
+define double @u64_to_f64(i64 %x) {
+; CHECK-LABEL: u64_to_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: u64_to_f64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf d0, x0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = uitofp i64 %x to double
+ ret double %cvt
+}
+
+define half @strict_convert_signed(i32 %x) {
+; CHECK-LABEL: strict_convert_signed:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_signed:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf s0, w0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %cvt
+}
+
+define float @strict_convert_unsigned(i64 %x) {
+; CHECK-LABEL: strict_convert_unsigned:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_unsigned:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf s0, x0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %cvt
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index 11fee267660c03..5e162fbfef196b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -418,8 +418,10 @@ define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) {
define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) {
; CHECK-LABEL: fcvtzu_v1f16_v1i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu x8, h0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzu_v1f16_v1i64:
@@ -441,10 +443,9 @@ define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov z1.h, z0.h[1]
-; CHECK-NEXT: fcvtzu x8, h0
-; CHECK-NEXT: fcvtzu x9, h1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.h
; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -472,20 +473,17 @@ define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) {
; CHECK-LABEL: fcvtzu_v4f16_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.h, z0.h[3]
; CHECK-NEXT: mov z2.h, z0.h[2]
; CHECK-NEXT: mov z3.h, z0.h[1]
-; CHECK-NEXT: fcvtzu x10, h0
-; CHECK-NEXT: fcvtzu x8, h1
-; CHECK-NEXT: fcvtzu x9, h2
-; CHECK-NEXT: fcvtzu x11, h3
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT: fmov d1, x11
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.h
+; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.h
+; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.h
; CHECK-NEXT: zip1 z1.d, z2.d, z1.d
-; CHECK-NEXT: stp q1, q0, [x1]
+; CHECK-NEXT: zip1 z0.d, z0.d, z3.d
+; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i64:
@@ -522,36 +520,29 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) {
; CHECK-LABEL: fcvtzu_v8f16_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: mov z2.h, z0.h[3]
; CHECK-NEXT: mov z3.h, z0.h[2]
; CHECK-NEXT: mov z4.h, z0.h[1]
-; CHECK-NEXT: fcvtzu x10, h0
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: fcvtzu x8, h2
-; CHECK-NEXT: fcvtzu x9, h3
-; CHECK-NEXT: fcvtzu x11, h4
+; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.h
+; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzu z4.d, p0/m, z4.h
; CHECK-NEXT: mov z5.h, z1.h[3]
; CHECK-NEXT: mov z6.h, z1.h[2]
-; CHECK-NEXT: mov z2.h, z1.h[1]
-; CHECK-NEXT: fcvtzu x14, h1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: fcvtzu x12, h5
-; CHECK-NEXT: fcvtzu x13, h6
-; CHECK-NEXT: fcvtzu x15, h2
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT: fmov d1, x12
-; CHECK-NEXT: fmov d4, x13
-; CHECK-NEXT: zip1 z2.d, z2.d, z3.d
-; CHECK-NEXT: fmov d3, x14
-; CHECK-NEXT: zip1 z1.d, z4.d, z1.d
-; CHECK-NEXT: fmov d4, x15
-; CHECK-NEXT: stp q2, q0, [x1]
-; CHECK-NEXT: zip1 z3.d, z3.d, z4.d
-; CHECK-NEXT: stp q3, q1, [x1, #32]
+; CHECK-NEXT: mov z7.h, z1.h[1]
+; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.h
+; CHECK-NEXT: zip1 z2.d, z3.d, z2.d
+; CHECK-NEXT: zip1 z0.d, z0.d, z4.d
+; CHECK-NEXT: fcvtzu z5.d, p0/m, z5.h
+; CHECK-NEXT: fcvtzu z6.d, p0/m, z6.h
+; CHECK-NEXT: fcvtzu z7.d, p0/m, z7.h
+; CHECK-NEXT: stp q0, q2, [x1]
+; CHECK-NEXT: zip1 z3.d, z6.d, z5.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z7.d
+; CHECK-NEXT: stp q1, q3, [x1, #32]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i64:
@@ -604,67 +595,54 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) {
define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) {
; CHECK-LABEL: fcvtzu_v16f16_v16i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: mov z5.d, z1.d
-; CHECK-NEXT: mov z2.h, z0.h[3]
-; CHECK-NEXT: mov z4.h, z1.h[1]
-; CHECK-NEXT: mov z6.h, z1.h[3]
-; CHECK-NEXT: fcvtzu x9, h1
-; CHECK-NEXT: fcvtzu x8, h0
-; CHECK-NEXT: mov z7.h, z0.h[1]
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z5.b, z5.b, z1.b, #8
-; CHECK-NEXT: fcvtzu x10, h2
-; CHECK-NEXT: fcvtzu x11, h4
-; CHECK-NEXT: fcvtzu x12, h6
-; CHECK-NEXT: mov z1.h, z1.h[2]
-; CHECK-NEXT: mov z0.h, z0.h[2]
-; CHECK-NEXT: fmov d16, x9
-; CHECK-NEXT: mov z2.h, z3.h[3]
-; CHECK-NEXT: mov z4.h, z5.h[3]
-; CHECK-NEXT: fcvtzu x14, h3
-; CHECK-NEXT: fcvtzu x13, h1
-; CHECK-NEXT: fcvtzu x15, h5
-; CHECK-NEXT: mov z1.h, z3.h[1]
-; CHECK-NEXT: mov z6.h, z5.h[1]
-; CHECK-NEXT: mov z5.h, z5.h[2]
-; CHECK-NEXT: mov z3.h, z3.h[2]
-; CHECK-NEXT: fcvtzu x9, h2
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fcvtzu x10, h4
-; CHECK-NEXT: fmov d4, x11
-; CHECK-NEXT: fcvtzu x11, h7
-; CHECK-NEXT: fmov d7, x12
-; CHECK-NEXT: fcvtzu x12, h0
-; CHECK-NEXT: fmov d0, x13
-; CHECK-NEXT: fcvtzu x13, h1
-; CHECK-NEXT: fmov d1, x14
-; CHECK-NEXT: fcvtzu x14, h6
-; CHECK-NEXT: fmov d6, x15
-; CHECK-NEXT: fcvtzu x15, h5
-; CHECK-NEXT: fmov d5, x9
-; CHECK-NEXT: fcvtzu x9, h3
-; CHECK-NEXT: zip1 z4.d, z16.d, z4.d
-; CHECK-NEXT: fmov d16, x8
-; CHECK-NEXT: zip1 z0.d, z0.d, z7.d
-; CHECK-NEXT: fmov d3, x12
-; CHECK-NEXT: fmov d7, x10
-; CHECK-NEXT: stp q4, q0, [x1, #64]
-; CHECK-NEXT: fmov d0, x14
-; CHECK-NEXT: fmov d4, x9
-; CHECK-NEXT: zip1 z2.d, z3.d, z2.d
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: zip1 z0.d, z6.d, z0.d
-; CHECK-NEXT: zip1 z4.d, z4.d, z5.d
-; CHECK-NEXT: zip1 z3.d, z16.d, z3.d
-; CHECK-NEXT: fmov d16, x15
-; CHECK-NEXT: stp q3, q2, [x1]
-; CHECK-NEXT: fmov d2, x13
-; CHECK-NEXT: zip1 z7.d, z16.d, z7.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z2.d
-; CHECK-NEXT: stp q0, q7, [x1, #96]
-; CHECK-NEXT: stp q1, q4, [x1, #32]
+; CHECK-NEXT: ldp q1, q0, [x0]
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z3.h, z1.h[1]
+; CHECK-NEXT: mov z5.h, z0.h[3]
+; CHECK-NEXT: mov z6.h, z0.h[2]
+; CHECK-NEXT: mov z16.d, z0.d
+; CHECK-NEXT: movprfx z2, z1
+; CHECK-NEXT: fcvtzu z2.d, p0/m, z1.h
+; CHECK-NEXT: mov z4.h, z1.h[3]
+; CHECK-NEXT: mov z7.h, z1.h[2]
+; CHECK-NEXT: mov z17.h, z0.h[1]
+; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzu z5.d, p0/m, z5.h
+; CHECK-NEXT: fcvtzu z6.d, p0/m, z6.h
+; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzu z4.d, p0/m, z4.h
+; CHECK-NEXT: fcvtzu z17.d, p0/m, z17.h
+; CHECK-NEXT: fcvtzu z7.d, p0/m, z7.h
+; CHECK-NEXT: mov z20.h, z1.h[3]
+; CHECK-NEXT: mov z18.h, z16.h[3]
+; CHECK-NEXT: mov z19.h, z16.h[2]
+; CHECK-NEXT: mov z21.h, z16.h[1]
+; CHECK-NEXT: zip1 z2.d, z2.d, z3.d
+; CHECK-NEXT: mov z3.h, z1.h[2]
+; CHECK-NEXT: zip1 z5.d, z6.d, z5.d
+; CHECK-NEXT: mov z6.h, z1.h[1]
+; CHECK-NEXT: zip1 z0.d, z0.d, z17.d
+; CHECK-NEXT: fcvtzu z16.d, p0/m, z16.h
+; CHECK-NEXT: fcvtzu z18.d, p0/m, z18.h
+; CHECK-NEXT: movprfx z17, z21
+; CHECK-NEXT: fcvtzu z17.d, p0/m, z21.h
+; CHECK-NEXT: fcvtzu z19.d, p0/m, z19.h
+; CHECK-NEXT: zip1 z4.d, z7.d, z4.d
+; CHECK-NEXT: movprfx z7, z20
+; CHECK-NEXT: fcvtzu z7.d, p0/m, z20.h
+; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.h
+; CHECK-NEXT: stp q0, q5, [x1, #64]
+; CHECK-NEXT: fcvtzu z6.d, p0/m, z6.h
+; CHECK-NEXT: zip1 z0.d, z19.d, z18.d
+; CHECK-NEXT: zip1 z5.d, z16.d, z17.d
+; CHECK-NEXT: stp q2, q4, [x1]
+; CHECK-NEXT: zip1 z2.d, z3.d, z7.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z6.d
+; CHECK-NEXT: stp q5, q0, [x1, #96]
+; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i64:
@@ -1186,7 +1164,10 @@ define void @fcvtzu_v8f32_v8i64(ptr %a, ptr %b) {
define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) {
; CHECK-LABEL: fcvtzu_v1f64_v1i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs w8, d0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.d
+; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: mov z0.h, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -2135,8 +2116,10 @@ define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) {
define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) {
; CHECK-LABEL: fcvtzs_v1f16_v1i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs x8, h0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzs_v1f16_v1i64:
@@ -2159,10 +2142,9 @@ define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov z1.h, z0.h[1]
-; CHECK-NEXT: fcvtzs x8, h0
-; CHECK-NEXT: fcvtzs x9, h1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h
; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -2190,20 +2172,17 @@ define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) {
; CHECK-LABEL: fcvtzs_v4f16_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.h, z0.h[3]
; CHECK-NEXT: mov z2.h, z0.h[2]
; CHECK-NEXT: mov z3.h, z0.h[1]
-; CHECK-NEXT: fcvtzs x10, h0
-; CHECK-NEXT: fcvtzs x8, h1
-; CHECK-NEXT: fcvtzs x9, h2
-; CHECK-NEXT: fcvtzs x11, h3
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT: fmov d1, x11
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h
; CHECK-NEXT: zip1 z1.d, z2.d, z1.d
-; CHECK-NEXT: stp q1, q0, [x1]
+; CHECK-NEXT: zip1 z0.d, z0.d, z3.d
+; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i64:
@@ -2240,36 +2219,29 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) {
; CHECK-LABEL: fcvtzs_v8f16_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: mov z2.h, z0.h[3]
; CHECK-NEXT: mov z3.h, z0.h[2]
; CHECK-NEXT: mov z4.h, z0.h[1]
-; CHECK-NEXT: fcvtzs x10, h0
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: fcvtzs x8, h2
-; CHECK-NEXT: fcvtzs x9, h3
-; CHECK-NEXT: fcvtzs x11, h4
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h
; CHECK-NEXT: mov z5.h, z1.h[3]
; CHECK-NEXT: mov z6.h, z1.h[2]
-; CHECK-NEXT: mov z2.h, z1.h[1]
-; CHECK-NEXT: fcvtzs x14, h1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: fcvtzs x12, h5
-; CHECK-NEXT: fcvtzs x13, h6
-; CHECK-NEXT: fcvtzs x15, h2
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT: fmov d1, x12
-; CHECK-NEXT: fmov d4, x13
-; CHECK-NEXT: zip1 z2.d, z2.d, z3.d
-; CHECK-NEXT: fmov d3, x14
-; CHECK-NEXT: zip1 z1.d, z4.d, z1.d
-; CHECK-NEXT: fmov d4, x15
-; CHECK-NEXT: stp q2, q0, [x1]
-; CHECK-NEXT: zip1 z3.d, z3.d, z4.d
-; CHECK-NEXT: stp q3, q1, [x1, #32]
+; CHECK-NEXT: mov z7.h, z1.h[1]
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT: zip1 z2.d, z3.d, z2.d
+; CHECK-NEXT: zip1 z0.d, z0.d, z4.d
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h
+; CHECK-NEXT: stp q0, q2, [x1]
+; CHECK-NEXT: zip1 z3.d, z6.d, z5.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z7.d
+; CHECK-NEXT: stp q1, q3, [x1, #32]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i64:
@@ -2322,67 +2294,54 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) {
define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) {
; CHECK-LABEL: fcvtzs_v16f16_v16i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: mov z5.d, z1.d
-; CHECK-NEXT: mov z2.h, z0.h[3]
-; CHECK-NEXT: mov z4.h, z1.h[1]
-; CHECK-NEXT: mov z6.h, z1.h[3]
-; CHECK-NEXT: fcvtzs x9, h1
-; CHECK-NEXT: fcvtzs x8, h0
-; CHECK-NEXT: mov z7.h, z0.h[1]
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z5.b, z5.b, z1.b, #8
-; CHECK-NEXT: fcvtzs x10, h2
-; CHECK-NEXT: fcvtzs x11, h4
-; CHECK-NEXT: fcvtzs x12, h6
-; CHECK-NEXT: mov z1.h, z1.h[2]
-; CHECK-NEXT: mov z0.h, z0.h[2]
-; CHECK-NEXT: fmov d16, x9
-; CHECK-NEXT: mov z2.h, z3.h[3]
-; CHECK-NEXT: mov z4.h, z5.h[3]
-; CHECK-NEXT: fcvtzs x14, h3
-; CHECK-NEXT: fcvtzs x13, h1
-; CHECK-NEXT: fcvtzs x15, h5
-; CHECK-NEXT: mov z1.h, z3.h[1]
-; CHECK-NEXT: mov z6.h, z5.h[1]
-; CHECK-NEXT: mov z5.h, z5.h[2]
-; CHECK-NEXT: mov z3.h, z3.h[2]
-; CHECK-NEXT: fcvtzs x9, h2
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fcvtzs x10, h4
-; CHECK-NEXT: fmov d4, x11
-; CHECK-NEXT: fcvtzs x11, h7
-; CHECK-NEXT: fmov d7, x12
-; CHECK-NEXT: fcvtzs x12, h0
-; CHECK-NEXT: fmov d0, x13
-; CHECK-NEXT: fcvtzs x13, h1
-; CHECK-NEXT: fmov d1, x14
-; CHECK-NEXT: fcvtzs x14, h6
-; CHECK-NEXT: fmov d6, x15
-; CHECK-NEXT: fcvtzs x15, h5
-; CHECK-NEXT: fmov d5, x9
-; CHECK-NEXT: fcvtzs x9, h3
-; CHECK-NEXT: zip1 z4.d, z16.d, z4.d
-; CHECK-NEXT: fmov d16, x8
-; CHECK-NEXT: zip1 z0.d, z0.d, z7.d
-; CHECK-NEXT: fmov d3, x12
-; CHECK-NEXT: fmov d7, x10
-; CHECK-NEXT: stp q4, q0, [x1, #64]
-; CHECK-NEXT: fmov d0, x14
-; CHECK-NEXT: fmov d4, x9
-; CHECK-NEXT: zip1 z2.d, z3.d, z2.d
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: zip1 z0.d, z6.d, z0.d
-; CHECK-NEXT: zip1 z4.d, z4.d, z5.d
-; CHECK-NEXT: zip1 z3.d, z16.d, z3.d
-; CHECK-NEXT: fmov d16, x15
-; CHECK-NEXT: stp q3, q2, [x1]
-; CHECK-NEXT: fmov d2, x13
-; CHECK-NEXT: zip1 z7.d, z16.d, z7.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z2.d
-; CHECK-NEXT: stp q0, q7, [x1, #96]
-; CHECK-NEXT: stp q1, q4, [x1, #32]
+; CHECK-NEXT: ldp q1, q0, [x0]
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z3.h, z1.h[1]
+; CHECK-NEXT: mov z5.h, z0.h[3]
+; CHECK-NEXT: mov z6.h, z0.h[2]
+; CHECK-NEXT: mov z16.d, z0.d
+; CHECK-NEXT: movprfx z2, z1
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.h
+; CHECK-NEXT: mov z4.h, z1.h[3]
+; CHECK-NEXT: mov z7.h, z1.h[2]
+; CHECK-NEXT: mov z17.h, z0.h[1]
+; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h
+; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h
+; CHECK-NEXT: fcvtzs z17.d, p0/m, z17.h
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h
+; CHECK-NEXT: mov z20.h, z1.h[3]
+; CHECK-NEXT: mov z18.h, z16.h[3]
+; CHECK-NEXT: mov z19.h, z16.h[2]
+; CHECK-NEXT: mov z21.h, z16.h[1]
+; CHECK-NEXT: zip1 z2.d, z2.d, z3.d
+; CHECK-NEXT: mov z3.h, z1.h[2]
+; CHECK-NEXT: zip1 z5.d, z6.d, z5.d
+; CHECK-NEXT: mov z6.h, z1.h[1]
+; CHECK-NEXT: zip1 z0.d, z0.d, z17.d
+; CHECK-NEXT: fcvtzs z16.d, p0/m, z16.h
+; CHECK-NEXT: fcvtzs z18.d, p0/m, z18.h
+; CHECK-NEXT: movprfx z17, z21
+; CHECK-NEXT: fcvtzs z17.d, p0/m, z21.h
+; CHECK-NEXT: fcvtzs z19.d, p0/m, z19.h
+; CHECK-NEXT: zip1 z4.d, z7.d, z4.d
+; CHECK-NEXT: movprfx z7, z20
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z20.h
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT: stp q0, q5, [x1, #64]
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h
+; CHECK-NEXT: zip1 z0.d, z19.d, z18.d
+; CHECK-NEXT: zip1 z5.d, z16.d, z17.d
+; CHECK-NEXT: stp q2, q4, [x1]
+; CHECK-NEXT: zip1 z2.d, z3.d, z7.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z6.d
+; CHECK-NEXT: stp q5, q0, [x1, #96]
+; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i64:
@@ -2906,7 +2865,10 @@ define void @fcvtzs_v8f32_v8i64(ptr %a, ptr %b) {
define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) {
; CHECK-LABEL: fcvtzs_v1f64_v1i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs w8, d0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.d
+; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: mov z0.h, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index e595686cb4975d..24ad0f502dbf33 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -390,8 +390,11 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and w8, w8, #0xffff
-; CHECK-NEXT: ucvtf d0, w8
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_v1i16_v1f64:
@@ -1142,10 +1145,9 @@ define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: mov z1.d, z0.d[1]
-; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: fmov x9, d1
-; CHECK-NEXT: ucvtf h0, x8
-; CHECK-NEXT: ucvtf h1, x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: ucvtf z1.h, p0/m, z1.d
; CHECK-NEXT: zip1 z0.h, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -2596,10 +2598,9 @@ define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: mov z1.d, z0.d[1]
-; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: fmov x9, d1
-; CHECK-NEXT: scvtf h0, x8
-; CHECK-NEXT: scvtf h1, x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: scvtf z1.h, p0/m, z1.d
; CHECK-NEXT: zip1 z0.h, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -2795,7 +2796,10 @@ define half @scvtf_i16_f16(ptr %0) {
; CHECK-LABEL: scvtf_i16_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrsh w8, [x0]
-; CHECK-NEXT: scvtf h0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i16_f16:
@@ -2813,7 +2817,10 @@ define float @scvtf_i16_f32(ptr %0) {
; CHECK-LABEL: scvtf_i16_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrsh w8, [x0]
-; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i16_f32:
@@ -2830,7 +2837,10 @@ define double @scvtf_i16_f64(ptr %0) {
; CHECK-LABEL: scvtf_i16_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrsh w8, [x0]
-; CHECK-NEXT: scvtf d0, w8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: scvtf z0.d, p0/m, z0.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i16_f64:
@@ -2846,8 +2856,10 @@ define double @scvtf_i16_f64(ptr %0) {
define half @scvtf_i32_f16(ptr %0) {
; CHECK-LABEL: scvtf_i32_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: scvtf h0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i32_f16:
@@ -2864,8 +2876,10 @@ define half @scvtf_i32_f16(ptr %0) {
define float @scvtf_i32_f32(ptr %0) {
; CHECK-LABEL: scvtf_i32_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i32_f32:
@@ -2881,8 +2895,10 @@ define float @scvtf_i32_f32(ptr %0) {
define double @scvtf_i32_f64(ptr %0) {
; CHECK-LABEL: scvtf_i32_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: scvtf d0, w8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: scvtf z0.d, p0/m, z0.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i32_f64:
@@ -2898,8 +2914,10 @@ define double @scvtf_i32_f64(ptr %0) {
define half @scvtf_i64_f16(ptr %0) {
; CHECK-LABEL: scvtf_i64_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: scvtf h0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i64_f16:
@@ -2916,8 +2934,10 @@ define half @scvtf_i64_f16(ptr %0) {
define float @scvtf_i64_f32(ptr %0) {
; CHECK-LABEL: scvtf_i64_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i64_f32:
@@ -2933,8 +2953,10 @@ define float @scvtf_i64_f32(ptr %0) {
define double @scvtf_i64_f64(ptr %0) {
; CHECK-LABEL: scvtf_i64_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: scvtf d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i64_f64:
@@ -2951,7 +2973,10 @@ define half @ucvtf_i16_f16(ptr %0) {
; CHECK-LABEL: ucvtf_i16_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ucvtf h0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i16_f16:
@@ -2969,7 +2994,10 @@ define float @ucvtf_i16_f32(ptr %0) {
; CHECK-LABEL: ucvtf_i16_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i16_f32:
@@ -2986,7 +3014,10 @@ define double @ucvtf_i16_f64(ptr %0) {
; CHECK-LABEL: ucvtf_i16_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ucvtf d0, w8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i16_f64:
@@ -3002,8 +3033,10 @@ define double @ucvtf_i16_f64(ptr %0) {
define half @ucvtf_i32_f16(ptr %0) {
; CHECK-LABEL: ucvtf_i32_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ucvtf h0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i32_f16:
@@ -3020,8 +3053,10 @@ define half @ucvtf_i32_f16(ptr %0) {
define float @ucvtf_i32_f32(ptr %0) {
; CHECK-LABEL: ucvtf_i32_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i32_f32:
@@ -3037,8 +3072,10 @@ define float @ucvtf_i32_f32(ptr %0) {
define double @ucvtf_i32_f64(ptr %0) {
; CHECK-LABEL: ucvtf_i32_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ucvtf d0, w8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i32_f64:
@@ -3054,8 +3091,10 @@ define double @ucvtf_i32_f64(ptr %0) {
define half @ucvtf_i64_f16(ptr %0) {
; CHECK-LABEL: ucvtf_i64_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ucvtf h0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i64_f16:
@@ -3072,8 +3111,10 @@ define half @ucvtf_i64_f16(ptr %0) {
define float @ucvtf_i64_f32(ptr %0) {
; CHECK-LABEL: ucvtf_i64_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i64_f32:
@@ -3089,8 +3130,10 @@ define float @ucvtf_i64_f32(ptr %0) {
define double @ucvtf_i64_f64(ptr %0) {
; CHECK-LABEL: ucvtf_i64_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i64_f64:
More information about the llvm-commits
mailing list