[llvm] [AArch64][SVE] Use SVE for scalar FP converts in streaming[-compatible] functions (1/n) (PR #118505)

Benjamin Maxwell via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 3 07:53:46 PST 2024


https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/118505

In streaming[-compatible] functions, use SVE for scalar FP conversions to/from integer types. This can help avoid moves between FPRs and GRPs, which could be costly.

This patch also updates definitions of SCVTF_ZPmZ_StoD and UCVTF_ZPmZ_StoD to disallow lowering to them from ISD nodes, as doing so requires creating a [U|S]INT_TO_FP_MERGE_PASSTHRU node with inconsistent types.

Follow up to #112213.

Note: This PR does not include support for f64 <-> i32 conversions (like #112564), which needs a bit more work to support.

>From a9374dd5f75f35fc20e7f68dfbe7784980e7618f Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 3 Dec 2024 15:47:10 +0000
Subject: [PATCH] [AArch64][SVE] Use SVE for scalar FP converts in
 streaming[-compatible] functions (1/n)

In streaming[-compatible] functions, use SVE for scalar FP conversions
to/from integer types. This can help avoid moves between FPRs and GRPs,
which could be costly.

This patch also updates definitions of SCVTF_ZPmZ_StoD and
UCVTF_ZPmZ_StoD to disallow lowering to them from ISD nodes, as doing
so requires creating a [U|S]INT_TO_FP_MERGE_PASSTHRU node with
inconsistent types.

Follow up to #112213.

Note: This PR does not include support for f64 <-> i32 conversions
(like #112564), which needs a bit more work to support.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  65 ++++
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |   4 +-
 .../sve-streaming-mode-cvt-fp-int-fp.ll       |  94 ++++-
 .../sve-streaming-mode-cvt-fp-to-int.ll       | 252 +++++++++++++
 .../sve-streaming-mode-cvt-int-to-fp.ll       | 252 +++++++++++++
 ...e-streaming-mode-fixed-length-fp-to-int.ll | 356 ++++++++----------
 ...e-streaming-mode-fixed-length-int-to-fp.ll |  94 +++--
 7 files changed, 864 insertions(+), 253 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e1be825fcf7bf3..1edc39ce375937 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18989,6 +18989,65 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
   return SDValue();
 }
 
+/// Tries to replace scalar FP <-> INT conversions with SVE in streaming
+/// functions, this can help to reduce the number of fmovs to/from GPRs.
+static SDValue
+tryToReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
+                                      const AArch64Subtarget *Subtarget) {
+  if (N->isStrictFPOpcode())
+    return SDValue();
+
+  if (!Subtarget->isSVEorStreamingSVEAvailable() ||
+      (!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible()))
+    return SDValue();
+
+  auto isSupportedType = [](EVT VT) {
+    if (!VT.isSimple())
+      return false;
+    // There are SVE instructions that can convert to/from all pairs of these
+    // int and float types. Note: We don't bother with i8 or i16 as those are
+    // illegal types for scalars.
+    return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
+                        VT.getSimpleVT().SimpleTy);
+  };
+
+  if (!isSupportedType(N->getValueType(0)) ||
+      !isSupportedType(N->getOperand(0).getValueType()))
+    return SDValue();
+
+  SDValue SrcVal = N->getOperand(0);
+  EVT SrcTy = SrcVal.getValueType();
+  EVT DestTy = N->getValueType(0);
+
+  bool IsI32ToF64 = SrcTy == MVT::i32 && DestTy == MVT::f64;
+  bool isF64ToI32 = SrcTy == MVT::f64 && DestTy == MVT::i32;
+
+  // Conversions between f64 and i32 are a special case as nxv2i32 is an illegal
+  // type (unlike the equivalent nxv2f32 for floating-point types).
+  // TODO: Support these conversations.
+  if (IsI32ToF64 || isF64ToI32)
+    return SDValue();
+
+  EVT SrcVecTy;
+  EVT DestVecTy;
+  if (DestTy.bitsGT(SrcTy)) {
+    DestVecTy = getPackedSVEVectorVT(DestTy);
+    SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
+                                 : DestVecTy.changeVectorElementType(SrcTy);
+  } else {
+    SrcVecTy = getPackedSVEVectorVT(SrcTy);
+    DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
+                                   : SrcVecTy.changeVectorElementType(DestTy);
+  }
+
+  SDLoc DL(N);
+  SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
+  SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy,
+                            DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
+  SDValue Convert = DAG.getNode(N->getOpcode(), DL, DestVecTy, Vec);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Convert, ZeroIdx);
+}
+
 static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
                                      const AArch64Subtarget *Subtarget) {
   // First try to optimize away the conversion when it's conditionally from
@@ -18996,6 +19055,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
   if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
     return Res;
 
+  if (SDValue Res = tryToReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
+    return Res;
+
   EVT VT = N->getValueType(0);
   if (VT != MVT::f32 && VT != MVT::f64)
     return SDValue();
@@ -19034,6 +19096,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
 static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
                                      TargetLowering::DAGCombinerInfo &DCI,
                                      const AArch64Subtarget *Subtarget) {
+  if (SDValue Res = tryToReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
+    return Res;
+
   if (!Subtarget->isNeonAvailable())
     return SDValue();
 
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a15e89be1a24b2..178ea149971b8f 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2348,8 +2348,8 @@ let Predicates = [HasSVEorSME] in {
   defm FCVT_ZPmZ_HtoD   : sve_fp_2op_p_zd< 0b1101001, "fcvt",   ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16,   AArch64fcvte_mt,  nxv2f64, nxv2i1, nxv2f16, ElementSizeD>;
   defm FCVT_ZPmZ_DtoS   : sve_fp_2op_p_zdr<0b1101010, "fcvt",   ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64,   AArch64fcvtr_mt,  nxv2f32, nxv2i1, nxv2f64, ElementSizeD>;
   defm FCVT_ZPmZ_StoD   : sve_fp_2op_p_zd< 0b1101011, "fcvt",   ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32,   AArch64fcvte_mt,  nxv2f64, nxv2i1, nxv2f32, ElementSizeD>;
-  defm SCVTF_ZPmZ_StoD  : sve_fp_2op_p_zd< 0b1110000, "scvtf",  ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32,  AArch64scvtf_mt,  nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
-  defm UCVTF_ZPmZ_StoD  : sve_fp_2op_p_zd< 0b1110001, "ucvtf",  ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32,  AArch64ucvtf_mt,  nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
+  defm SCVTF_ZPmZ_StoD  : sve_fp_2op_p_zd< 0b1110000, "scvtf",  ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32,  null_frag,        nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
+  defm UCVTF_ZPmZ_StoD  : sve_fp_2op_p_zd< 0b1110001, "ucvtf",  ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32,  null_frag,        nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
   defm UCVTF_ZPmZ_StoH  : sve_fp_2op_p_zd< 0b0110101, "ucvtf",  ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32,  AArch64ucvtf_mt,  nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
   defm SCVTF_ZPmZ_DtoS  : sve_fp_2op_p_zd< 0b1110100, "scvtf",  ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64,  AArch64scvtf_mt,  nxv2f32, nxv2i1, nxv2i64, ElementSizeD>;
   defm SCVTF_ZPmZ_StoH  : sve_fp_2op_p_zd< 0b0110100, "scvtf",  ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32,  AArch64scvtf_mt,  nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
index f402463de7be81..1050dc0210a67e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -1,15 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible -mattr=+sme2p2  < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
-; RUN: llc < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sme2p2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc -mattr=+neon < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
 define double @t1(double %x) {
 ; CHECK-LABEL: t1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    scvtf d0, x8
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; USE-NEON-NO-GPRS-LABEL: t1:
@@ -17,6 +22,12 @@ define double @t1(double %x) {
 ; USE-NEON-NO-GPRS-NEXT:    fcvtzs d0, d0
 ; USE-NEON-NO-GPRS-NEXT:    scvtf d0, d0
 ; USE-NEON-NO-GPRS-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: t1:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzs x8, d0
+; NONEON-NOSVE-NEXT:    scvtf d0, x8
+; NONEON-NOSVE-NEXT:    ret
 entry:
   %conv = fptosi double %x to i64
   %conv1 = sitofp i64 %conv to double
@@ -26,8 +37,11 @@ entry:
 define float @t2(float %x) {
 ; CHECK-LABEL: t2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs w8, s0
-; CHECK-NEXT:    scvtf s0, w8
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; USE-NEON-NO-GPRS-LABEL: t2:
@@ -35,6 +49,12 @@ define float @t2(float %x) {
 ; USE-NEON-NO-GPRS-NEXT:    fcvtzs s0, s0
 ; USE-NEON-NO-GPRS-NEXT:    scvtf s0, s0
 ; USE-NEON-NO-GPRS-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: t2:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzs w8, s0
+; NONEON-NOSVE-NEXT:    scvtf s0, w8
+; NONEON-NOSVE-NEXT:    ret
 entry:
   %conv = fptosi float %x to i32
   %conv1 = sitofp i32 %conv to float
@@ -44,10 +64,11 @@ entry:
 define half @t3(half %x)  {
 ; CHECK-LABEL: t3:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvt s0, h0
-; CHECK-NEXT:    fcvtzs w8, s0
-; CHECK-NEXT:    scvtf s0, w8
-; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.h
+; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; USE-NEON-NO-GPRS-LABEL: t3:
@@ -57,6 +78,14 @@ define half @t3(half %x)  {
 ; USE-NEON-NO-GPRS-NEXT:    scvtf s0, s0
 ; USE-NEON-NO-GPRS-NEXT:    fcvt h0, s0
 ; USE-NEON-NO-GPRS-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: t3:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
+; NONEON-NOSVE-NEXT:    fcvtzs w8, s0
+; NONEON-NOSVE-NEXT:    scvtf s0, w8
+; NONEON-NOSVE-NEXT:    fcvt h0, s0
+; NONEON-NOSVE-NEXT:    ret
 entry:
   %conv = fptosi half %x to i32
   %conv1 = sitofp i32 %conv to half
@@ -66,8 +95,11 @@ entry:
 define double @t4(double %x) {
 ; CHECK-LABEL: t4:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu x8, d0
-; CHECK-NEXT:    ucvtf d0, x8
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; USE-NEON-NO-GPRS-LABEL: t4:
@@ -75,6 +107,12 @@ define double @t4(double %x) {
 ; USE-NEON-NO-GPRS-NEXT:    fcvtzu d0, d0
 ; USE-NEON-NO-GPRS-NEXT:    ucvtf d0, d0
 ; USE-NEON-NO-GPRS-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: t4:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzu x8, d0
+; NONEON-NOSVE-NEXT:    ucvtf d0, x8
+; NONEON-NOSVE-NEXT:    ret
 entry:
   %conv = fptoui double %x to i64
   %conv1 = uitofp i64 %conv to double
@@ -84,8 +122,11 @@ entry:
 define float @t5(float %x) {
 ; CHECK-LABEL: t5:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu w8, s0
-; CHECK-NEXT:    ucvtf s0, w8
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; USE-NEON-NO-GPRS-LABEL: t5:
@@ -93,6 +134,12 @@ define float @t5(float %x) {
 ; USE-NEON-NO-GPRS-NEXT:    fcvtzu s0, s0
 ; USE-NEON-NO-GPRS-NEXT:    ucvtf s0, s0
 ; USE-NEON-NO-GPRS-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: t5:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzu w8, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ret
 entry:
   %conv = fptoui float %x to i32
   %conv1 = uitofp i32 %conv to float
@@ -102,10 +149,11 @@ entry:
 define half @t6(half %x)  {
 ; CHECK-LABEL: t6:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvt s0, h0
-; CHECK-NEXT:    fcvtzu w8, s0
-; CHECK-NEXT:    ucvtf s0, w8
-; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.h
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; USE-NEON-NO-GPRS-LABEL: t6:
@@ -115,6 +163,14 @@ define half @t6(half %x)  {
 ; USE-NEON-NO-GPRS-NEXT:    ucvtf s0, s0
 ; USE-NEON-NO-GPRS-NEXT:    fcvt h0, s0
 ; USE-NEON-NO-GPRS-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: t6:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
+; NONEON-NOSVE-NEXT:    fcvtzu w8, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    fcvt h0, s0
+; NONEON-NOSVE-NEXT:    ret
 entry:
   %conv = fptoui half %x to i32
   %conv1 = uitofp i32 %conv to half
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
new file mode 100644
index 00000000000000..3ae0089d409d0e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @f16_to_s32(half %x) {
+; CHECK-LABEL: f16_to_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.h
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: f16_to_s32:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
+; NONEON-NOSVE-NEXT:    fcvtzs w0, s0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = fptosi half %x to i32
+  ret i32 %cvt
+}
+
+define i64 @f16_to_s64(half %x) {
+; CHECK-LABEL: f16_to_s64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: f16_to_s64:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
+; NONEON-NOSVE-NEXT:    fcvtzs x0, s0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = fptosi half %x to i64
+  ret i64 %cvt
+}
+
+define i32 @f32_to_s32(float %x) {
+; CHECK-LABEL: f32_to_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: f32_to_s32:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzs w0, s0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = fptosi float %x to i32
+  ret i32 %cvt
+}
+
+define i64 @f32_to_s64(float %x) {
+; CHECK-LABEL: f32_to_s64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: f32_to_s64:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzs x0, s0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = fptosi float %x to i64
+  ret i64 %cvt
+}
+
+define i32 @f64_to_s32(double %x) {
+; CHECK-LABEL: f64_to_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs w0, d0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: f64_to_s32:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzs w0, d0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = fptosi double %x to i32
+  ret i32 %cvt
+}
+
+define i64 @f64_to_s64(double %x) {
+; CHECK-LABEL: f64_to_s64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: f64_to_s64:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzs x0, d0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = fptosi double %x to i64
+  ret i64 %cvt
+}
+
+define i32 @f16_to_u32(half %x) {
+; CHECK-LABEL: f16_to_u32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.h
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: f16_to_u32:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
+; NONEON-NOSVE-NEXT:    fcvtzu w0, s0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = fptoui half %x to i32
+  ret i32 %cvt
+}
+
+define i64 @f16_to_u64(half %x) {
+; CHECK-LABEL: f16_to_u64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: f16_to_u64:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
+; NONEON-NOSVE-NEXT:    fcvtzu x0, s0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = fptoui half %x to i64
+  ret i64 %cvt
+}
+
+define i32 @f32_to_u32(float %x) {
+; CHECK-LABEL: f32_to_u32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: f32_to_u32:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzu w0, s0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = fptoui float %x to i32
+  ret i32 %cvt
+}
+
+define i64 @f32_to_u64(float %x) {
+; CHECK-LABEL: f32_to_u64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.s
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: f32_to_u64:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzu x0, s0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = fptoui float %x to i64
+  ret i64 %cvt
+}
+
+define i32 @f64_to_u32(double %x) {
+; CHECK-LABEL: f64_to_u32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu w0, d0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: f64_to_u32:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzu w0, d0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = fptoui double %x to i32
+  ret i32 %cvt
+}
+
+define i64 @f64_to_u64(double %x) {
+; CHECK-LABEL: f64_to_u64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: f64_to_u64:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzu x0, d0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = fptoui double %x to i64
+  ret i64 %cvt
+}
+
+define i32 @strict_convert_signed(double %x) {
+; CHECK-LABEL: strict_convert_signed:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs w0, d0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_signed:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzs w0, d0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") #0
+  ret i32 %cvt
+}
+
+define i32 @strict_convert_unsigned(float %x) {
+; CHECK-LABEL: strict_convert_unsigned:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu w0, s0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_unsigned:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    fcvtzu w0, s0
+; NONEON-NOSVE-NEXT:    ret
+  entry:
+  %cvt = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0
+  ret i32 %cvt
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
new file mode 100644
index 00000000000000..f30d2d578fdeb7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define half @s32_to_f16(i32 %x) {
+; CHECK-LABEL: s32_to_f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: s32_to_f16:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    scvtf s0, w0
+; NONEON-NOSVE-NEXT:    fcvt h0, s0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = sitofp i32 %x to half
+  ret half %cvt
+}
+
+define float @s32_to_f32(i32 %x) {
+; CHECK-LABEL: s32_to_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: s32_to_f32:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    scvtf s0, w0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = sitofp i32 %x to float
+  ret float %cvt
+}
+
+define double @s32_to_f64(i32 %x) {
+; CHECK-LABEL: s32_to_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf d0, w0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: s32_to_f64:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    scvtf d0, w0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = sitofp i32 %x to double
+  ret double %cvt
+}
+
+define half @u32_to_f16(i32 %x) {
+; CHECK-LABEL: u32_to_f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: u32_to_f16:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    ucvtf s0, w0
+; NONEON-NOSVE-NEXT:    fcvt h0, s0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = uitofp i32 %x to half
+  ret half %cvt
+}
+
+define float @u32_to_f32(i32 %x) {
+; CHECK-LABEL: u32_to_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: u32_to_f32:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    ucvtf s0, w0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = uitofp i32 %x to float
+  ret float %cvt
+}
+
+define double @u32_to_f64(i32 %x) {
+; CHECK-LABEL: u32_to_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf d0, w0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: u32_to_f64:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    ucvtf d0, w0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = uitofp i32 %x to double
+  ret double %cvt
+}
+
+define half @s64_to_f16(i64 %x) {
+; CHECK-LABEL: s64_to_f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: s64_to_f16:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    scvtf s0, x0
+; NONEON-NOSVE-NEXT:    fcvt h0, s0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = sitofp i64 %x to half
+  ret half %cvt
+}
+
+define float @s64_to_f32(i64 %x) {
+; CHECK-LABEL: s64_to_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: s64_to_f32:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    scvtf s0, x0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = sitofp i64 %x to float
+  ret float %cvt
+}
+
+define double @s64_to_f64(i64 %x) {
+; CHECK-LABEL: s64_to_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: s64_to_f64:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    scvtf d0, x0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = sitofp i64 %x to double
+  ret double %cvt
+}
+
+define half @u64_to_f16(i64 %x) {
+; CHECK-LABEL: u64_to_f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: u64_to_f16:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    ucvtf s0, x0
+; NONEON-NOSVE-NEXT:    fcvt h0, s0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = uitofp i64 %x to half
+  ret half %cvt
+}
+
+define float @u64_to_f32(i64 %x) {
+; CHECK-LABEL: u64_to_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: u64_to_f32:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    ucvtf s0, x0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = uitofp i64 %x to float
+  ret float %cvt
+}
+
+define double @u64_to_f64(i64 %x) {
+; CHECK-LABEL: u64_to_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: u64_to_f64:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    ucvtf d0, x0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = uitofp i64 %x to double
+  ret double %cvt
+}
+
+define float @strict_convert_signed(i32 %x) {
+; CHECK-LABEL: strict_convert_signed:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf s0, w0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_signed:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    scvtf s0, w0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %cvt
+}
+
+define float @strict_convert_unsigned(i64 %x) {
+; CHECK-LABEL: strict_convert_unsigned:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ucvtf s0, x0
+; CHECK-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_unsigned:
+; NONEON-NOSVE:       // %bb.0: // %entry
+; NONEON-NOSVE-NEXT:    ucvtf s0, x0
+; NONEON-NOSVE-NEXT:    ret
+entry:
+  %cvt = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %cvt
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index 11fee267660c03..b61c30af379944 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -418,8 +418,10 @@ define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) {
 define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) {
 ; CHECK-LABEL: fcvtzu_v1f16_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu x8, h0
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v1f16_v1i64:
@@ -441,10 +443,9 @@ define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.h, z0.h[1]
-; CHECK-NEXT:    fcvtzu x8, h0
-; CHECK-NEXT:    fcvtzu x9, h1
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fmov d1, x9
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT:    fcvtzu z1.d, p0/m, z1.h
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
@@ -472,20 +473,17 @@ define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) {
 ; CHECK-LABEL: fcvtzu_v4f16_v4i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.h, z0.h[3]
 ; CHECK-NEXT:    mov z2.h, z0.h[2]
 ; CHECK-NEXT:    mov z3.h, z0.h[1]
-; CHECK-NEXT:    fcvtzu x10, h0
-; CHECK-NEXT:    fcvtzu x8, h1
-; CHECK-NEXT:    fcvtzu x9, h2
-; CHECK-NEXT:    fcvtzu x11, h3
-; CHECK-NEXT:    fmov d2, x10
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fmov d1, x9
-; CHECK-NEXT:    zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT:    fmov d1, x11
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT:    fcvtzu z1.d, p0/m, z1.h
+; CHECK-NEXT:    fcvtzu z2.d, p0/m, z2.h
+; CHECK-NEXT:    fcvtzu z3.d, p0/m, z3.h
 ; CHECK-NEXT:    zip1 z1.d, z2.d, z1.d
-; CHECK-NEXT:    stp q1, q0, [x1]
+; CHECK-NEXT:    zip1 z0.d, z0.d, z3.d
+; CHECK-NEXT:    stp q0, q1, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i64:
@@ -522,36 +520,29 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) {
 ; CHECK-LABEL: fcvtzu_v8f16_v8i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, z0.d
 ; CHECK-NEXT:    mov z2.h, z0.h[3]
 ; CHECK-NEXT:    mov z3.h, z0.h[2]
 ; CHECK-NEXT:    mov z4.h, z0.h[1]
-; CHECK-NEXT:    fcvtzu x10, h0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    fcvtzu x8, h2
-; CHECK-NEXT:    fcvtzu x9, h3
-; CHECK-NEXT:    fcvtzu x11, h4
+; CHECK-NEXT:    fcvtzu z2.d, p0/m, z2.h
+; CHECK-NEXT:    fcvtzu z3.d, p0/m, z3.h
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT:    fcvtzu z4.d, p0/m, z4.h
 ; CHECK-NEXT:    mov z5.h, z1.h[3]
 ; CHECK-NEXT:    mov z6.h, z1.h[2]
-; CHECK-NEXT:    mov z2.h, z1.h[1]
-; CHECK-NEXT:    fcvtzu x14, h1
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fmov d1, x9
-; CHECK-NEXT:    fmov d3, x11
-; CHECK-NEXT:    fcvtzu x12, h5
-; CHECK-NEXT:    fcvtzu x13, h6
-; CHECK-NEXT:    fcvtzu x15, h2
-; CHECK-NEXT:    fmov d2, x10
-; CHECK-NEXT:    zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT:    fmov d1, x12
-; CHECK-NEXT:    fmov d4, x13
-; CHECK-NEXT:    zip1 z2.d, z2.d, z3.d
-; CHECK-NEXT:    fmov d3, x14
-; CHECK-NEXT:    zip1 z1.d, z4.d, z1.d
-; CHECK-NEXT:    fmov d4, x15
-; CHECK-NEXT:    stp q2, q0, [x1]
-; CHECK-NEXT:    zip1 z3.d, z3.d, z4.d
-; CHECK-NEXT:    stp q3, q1, [x1, #32]
+; CHECK-NEXT:    mov z7.h, z1.h[1]
+; CHECK-NEXT:    fcvtzu z1.d, p0/m, z1.h
+; CHECK-NEXT:    zip1 z2.d, z3.d, z2.d
+; CHECK-NEXT:    zip1 z0.d, z0.d, z4.d
+; CHECK-NEXT:    fcvtzu z5.d, p0/m, z5.h
+; CHECK-NEXT:    fcvtzu z6.d, p0/m, z6.h
+; CHECK-NEXT:    fcvtzu z7.d, p0/m, z7.h
+; CHECK-NEXT:    stp q0, q2, [x1]
+; CHECK-NEXT:    zip1 z3.d, z6.d, z5.d
+; CHECK-NEXT:    zip1 z1.d, z1.d, z7.d
+; CHECK-NEXT:    stp q1, q3, [x1, #32]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i64:
@@ -604,67 +595,54 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) {
 define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) {
 ; CHECK-LABEL: fcvtzu_v16f16_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q0, q1, [x0]
-; CHECK-NEXT:    mov z3.d, z0.d
-; CHECK-NEXT:    mov z5.d, z1.d
-; CHECK-NEXT:    mov z2.h, z0.h[3]
-; CHECK-NEXT:    mov z4.h, z1.h[1]
-; CHECK-NEXT:    mov z6.h, z1.h[3]
-; CHECK-NEXT:    fcvtzu x9, h1
-; CHECK-NEXT:    fcvtzu x8, h0
-; CHECK-NEXT:    mov z7.h, z0.h[1]
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ext z5.b, z5.b, z1.b, #8
-; CHECK-NEXT:    fcvtzu x10, h2
-; CHECK-NEXT:    fcvtzu x11, h4
-; CHECK-NEXT:    fcvtzu x12, h6
-; CHECK-NEXT:    mov z1.h, z1.h[2]
-; CHECK-NEXT:    mov z0.h, z0.h[2]
-; CHECK-NEXT:    fmov d16, x9
-; CHECK-NEXT:    mov z2.h, z3.h[3]
-; CHECK-NEXT:    mov z4.h, z5.h[3]
-; CHECK-NEXT:    fcvtzu x14, h3
-; CHECK-NEXT:    fcvtzu x13, h1
-; CHECK-NEXT:    fcvtzu x15, h5
-; CHECK-NEXT:    mov z1.h, z3.h[1]
-; CHECK-NEXT:    mov z6.h, z5.h[1]
-; CHECK-NEXT:    mov z5.h, z5.h[2]
-; CHECK-NEXT:    mov z3.h, z3.h[2]
-; CHECK-NEXT:    fcvtzu x9, h2
-; CHECK-NEXT:    fmov d2, x10
-; CHECK-NEXT:    fcvtzu x10, h4
-; CHECK-NEXT:    fmov d4, x11
-; CHECK-NEXT:    fcvtzu x11, h7
-; CHECK-NEXT:    fmov d7, x12
-; CHECK-NEXT:    fcvtzu x12, h0
-; CHECK-NEXT:    fmov d0, x13
-; CHECK-NEXT:    fcvtzu x13, h1
-; CHECK-NEXT:    fmov d1, x14
-; CHECK-NEXT:    fcvtzu x14, h6
-; CHECK-NEXT:    fmov d6, x15
-; CHECK-NEXT:    fcvtzu x15, h5
-; CHECK-NEXT:    fmov d5, x9
-; CHECK-NEXT:    fcvtzu x9, h3
-; CHECK-NEXT:    zip1 z4.d, z16.d, z4.d
-; CHECK-NEXT:    fmov d16, x8
-; CHECK-NEXT:    zip1 z0.d, z0.d, z7.d
-; CHECK-NEXT:    fmov d3, x12
-; CHECK-NEXT:    fmov d7, x10
-; CHECK-NEXT:    stp q4, q0, [x1, #64]
-; CHECK-NEXT:    fmov d0, x14
-; CHECK-NEXT:    fmov d4, x9
-; CHECK-NEXT:    zip1 z2.d, z3.d, z2.d
-; CHECK-NEXT:    fmov d3, x11
-; CHECK-NEXT:    zip1 z0.d, z6.d, z0.d
-; CHECK-NEXT:    zip1 z4.d, z4.d, z5.d
-; CHECK-NEXT:    zip1 z3.d, z16.d, z3.d
-; CHECK-NEXT:    fmov d16, x15
-; CHECK-NEXT:    stp q3, q2, [x1]
-; CHECK-NEXT:    fmov d2, x13
-; CHECK-NEXT:    zip1 z7.d, z16.d, z7.d
-; CHECK-NEXT:    zip1 z1.d, z1.d, z2.d
-; CHECK-NEXT:    stp q0, q7, [x1, #96]
-; CHECK-NEXT:    stp q1, q4, [x1, #32]
+; CHECK-NEXT:    ldp q1, q0, [x0]
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z3.h, z1.h[1]
+; CHECK-NEXT:    mov z5.h, z0.h[3]
+; CHECK-NEXT:    mov z6.h, z0.h[2]
+; CHECK-NEXT:    mov z16.d, z0.d
+; CHECK-NEXT:    movprfx z2, z1
+; CHECK-NEXT:    fcvtzu z2.d, p0/m, z1.h
+; CHECK-NEXT:    mov z4.h, z1.h[3]
+; CHECK-NEXT:    mov z7.h, z1.h[2]
+; CHECK-NEXT:    mov z17.h, z0.h[1]
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    fcvtzu z3.d, p0/m, z3.h
+; CHECK-NEXT:    fcvtzu z5.d, p0/m, z5.h
+; CHECK-NEXT:    fcvtzu z6.d, p0/m, z6.h
+; CHECK-NEXT:    ext z16.b, z16.b, z0.b, #8
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT:    fcvtzu z4.d, p0/m, z4.h
+; CHECK-NEXT:    fcvtzu z17.d, p0/m, z17.h
+; CHECK-NEXT:    fcvtzu z7.d, p0/m, z7.h
+; CHECK-NEXT:    mov z20.h, z1.h[3]
+; CHECK-NEXT:    mov z18.h, z16.h[3]
+; CHECK-NEXT:    mov z19.h, z16.h[2]
+; CHECK-NEXT:    mov z21.h, z16.h[1]
+; CHECK-NEXT:    zip1 z2.d, z2.d, z3.d
+; CHECK-NEXT:    mov z3.h, z1.h[2]
+; CHECK-NEXT:    zip1 z5.d, z6.d, z5.d
+; CHECK-NEXT:    mov z6.h, z1.h[1]
+; CHECK-NEXT:    zip1 z0.d, z0.d, z17.d
+; CHECK-NEXT:    fcvtzu z16.d, p0/m, z16.h
+; CHECK-NEXT:    fcvtzu z18.d, p0/m, z18.h
+; CHECK-NEXT:    movprfx z17, z21
+; CHECK-NEXT:    fcvtzu z17.d, p0/m, z21.h
+; CHECK-NEXT:    fcvtzu z19.d, p0/m, z19.h
+; CHECK-NEXT:    zip1 z4.d, z7.d, z4.d
+; CHECK-NEXT:    movprfx z7, z20
+; CHECK-NEXT:    fcvtzu z7.d, p0/m, z20.h
+; CHECK-NEXT:    fcvtzu z3.d, p0/m, z3.h
+; CHECK-NEXT:    fcvtzu z1.d, p0/m, z1.h
+; CHECK-NEXT:    stp q0, q5, [x1, #64]
+; CHECK-NEXT:    fcvtzu z6.d, p0/m, z6.h
+; CHECK-NEXT:    zip1 z0.d, z19.d, z18.d
+; CHECK-NEXT:    zip1 z5.d, z16.d, z17.d
+; CHECK-NEXT:    stp q2, q4, [x1]
+; CHECK-NEXT:    zip1 z2.d, z3.d, z7.d
+; CHECK-NEXT:    zip1 z1.d, z1.d, z6.d
+; CHECK-NEXT:    stp q5, q0, [x1, #96]
+; CHECK-NEXT:    stp q1, q2, [x1, #32]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i64:
@@ -2135,8 +2113,10 @@ define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) {
 define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) {
 ; CHECK-LABEL: fcvtzs_v1f16_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x8, h0
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v1f16_v1i64:
@@ -2159,10 +2139,9 @@ define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.h, z0.h[1]
-; CHECK-NEXT:    fcvtzs x8, h0
-; CHECK-NEXT:    fcvtzs x9, h1
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fmov d1, x9
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z1.h
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
@@ -2190,20 +2169,17 @@ define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) {
 ; CHECK-LABEL: fcvtzs_v4f16_v4i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.h, z0.h[3]
 ; CHECK-NEXT:    mov z2.h, z0.h[2]
 ; CHECK-NEXT:    mov z3.h, z0.h[1]
-; CHECK-NEXT:    fcvtzs x10, h0
-; CHECK-NEXT:    fcvtzs x8, h1
-; CHECK-NEXT:    fcvtzs x9, h2
-; CHECK-NEXT:    fcvtzs x11, h3
-; CHECK-NEXT:    fmov d2, x10
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fmov d1, x9
-; CHECK-NEXT:    zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT:    fmov d1, x11
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT:    fcvtzs z2.d, p0/m, z2.h
+; CHECK-NEXT:    fcvtzs z3.d, p0/m, z3.h
 ; CHECK-NEXT:    zip1 z1.d, z2.d, z1.d
-; CHECK-NEXT:    stp q1, q0, [x1]
+; CHECK-NEXT:    zip1 z0.d, z0.d, z3.d
+; CHECK-NEXT:    stp q0, q1, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i64:
@@ -2240,36 +2216,29 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) {
 ; CHECK-LABEL: fcvtzs_v8f16_v8i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, z0.d
 ; CHECK-NEXT:    mov z2.h, z0.h[3]
 ; CHECK-NEXT:    mov z3.h, z0.h[2]
 ; CHECK-NEXT:    mov z4.h, z0.h[1]
-; CHECK-NEXT:    fcvtzs x10, h0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    fcvtzs x8, h2
-; CHECK-NEXT:    fcvtzs x9, h3
-; CHECK-NEXT:    fcvtzs x11, h4
+; CHECK-NEXT:    fcvtzs z2.d, p0/m, z2.h
+; CHECK-NEXT:    fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT:    fcvtzs z4.d, p0/m, z4.h
 ; CHECK-NEXT:    mov z5.h, z1.h[3]
 ; CHECK-NEXT:    mov z6.h, z1.h[2]
-; CHECK-NEXT:    mov z2.h, z1.h[1]
-; CHECK-NEXT:    fcvtzs x14, h1
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fmov d1, x9
-; CHECK-NEXT:    fmov d3, x11
-; CHECK-NEXT:    fcvtzs x12, h5
-; CHECK-NEXT:    fcvtzs x13, h6
-; CHECK-NEXT:    fcvtzs x15, h2
-; CHECK-NEXT:    fmov d2, x10
-; CHECK-NEXT:    zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT:    fmov d1, x12
-; CHECK-NEXT:    fmov d4, x13
-; CHECK-NEXT:    zip1 z2.d, z2.d, z3.d
-; CHECK-NEXT:    fmov d3, x14
-; CHECK-NEXT:    zip1 z1.d, z4.d, z1.d
-; CHECK-NEXT:    fmov d4, x15
-; CHECK-NEXT:    stp q2, q0, [x1]
-; CHECK-NEXT:    zip1 z3.d, z3.d, z4.d
-; CHECK-NEXT:    stp q3, q1, [x1, #32]
+; CHECK-NEXT:    mov z7.h, z1.h[1]
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT:    zip1 z2.d, z3.d, z2.d
+; CHECK-NEXT:    zip1 z0.d, z0.d, z4.d
+; CHECK-NEXT:    fcvtzs z5.d, p0/m, z5.h
+; CHECK-NEXT:    fcvtzs z6.d, p0/m, z6.h
+; CHECK-NEXT:    fcvtzs z7.d, p0/m, z7.h
+; CHECK-NEXT:    stp q0, q2, [x1]
+; CHECK-NEXT:    zip1 z3.d, z6.d, z5.d
+; CHECK-NEXT:    zip1 z1.d, z1.d, z7.d
+; CHECK-NEXT:    stp q1, q3, [x1, #32]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i64:
@@ -2322,67 +2291,54 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) {
 define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) {
 ; CHECK-LABEL: fcvtzs_v16f16_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q0, q1, [x0]
-; CHECK-NEXT:    mov z3.d, z0.d
-; CHECK-NEXT:    mov z5.d, z1.d
-; CHECK-NEXT:    mov z2.h, z0.h[3]
-; CHECK-NEXT:    mov z4.h, z1.h[1]
-; CHECK-NEXT:    mov z6.h, z1.h[3]
-; CHECK-NEXT:    fcvtzs x9, h1
-; CHECK-NEXT:    fcvtzs x8, h0
-; CHECK-NEXT:    mov z7.h, z0.h[1]
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ext z5.b, z5.b, z1.b, #8
-; CHECK-NEXT:    fcvtzs x10, h2
-; CHECK-NEXT:    fcvtzs x11, h4
-; CHECK-NEXT:    fcvtzs x12, h6
-; CHECK-NEXT:    mov z1.h, z1.h[2]
-; CHECK-NEXT:    mov z0.h, z0.h[2]
-; CHECK-NEXT:    fmov d16, x9
-; CHECK-NEXT:    mov z2.h, z3.h[3]
-; CHECK-NEXT:    mov z4.h, z5.h[3]
-; CHECK-NEXT:    fcvtzs x14, h3
-; CHECK-NEXT:    fcvtzs x13, h1
-; CHECK-NEXT:    fcvtzs x15, h5
-; CHECK-NEXT:    mov z1.h, z3.h[1]
-; CHECK-NEXT:    mov z6.h, z5.h[1]
-; CHECK-NEXT:    mov z5.h, z5.h[2]
-; CHECK-NEXT:    mov z3.h, z3.h[2]
-; CHECK-NEXT:    fcvtzs x9, h2
-; CHECK-NEXT:    fmov d2, x10
-; CHECK-NEXT:    fcvtzs x10, h4
-; CHECK-NEXT:    fmov d4, x11
-; CHECK-NEXT:    fcvtzs x11, h7
-; CHECK-NEXT:    fmov d7, x12
-; CHECK-NEXT:    fcvtzs x12, h0
-; CHECK-NEXT:    fmov d0, x13
-; CHECK-NEXT:    fcvtzs x13, h1
-; CHECK-NEXT:    fmov d1, x14
-; CHECK-NEXT:    fcvtzs x14, h6
-; CHECK-NEXT:    fmov d6, x15
-; CHECK-NEXT:    fcvtzs x15, h5
-; CHECK-NEXT:    fmov d5, x9
-; CHECK-NEXT:    fcvtzs x9, h3
-; CHECK-NEXT:    zip1 z4.d, z16.d, z4.d
-; CHECK-NEXT:    fmov d16, x8
-; CHECK-NEXT:    zip1 z0.d, z0.d, z7.d
-; CHECK-NEXT:    fmov d3, x12
-; CHECK-NEXT:    fmov d7, x10
-; CHECK-NEXT:    stp q4, q0, [x1, #64]
-; CHECK-NEXT:    fmov d0, x14
-; CHECK-NEXT:    fmov d4, x9
-; CHECK-NEXT:    zip1 z2.d, z3.d, z2.d
-; CHECK-NEXT:    fmov d3, x11
-; CHECK-NEXT:    zip1 z0.d, z6.d, z0.d
-; CHECK-NEXT:    zip1 z4.d, z4.d, z5.d
-; CHECK-NEXT:    zip1 z3.d, z16.d, z3.d
-; CHECK-NEXT:    fmov d16, x15
-; CHECK-NEXT:    stp q3, q2, [x1]
-; CHECK-NEXT:    fmov d2, x13
-; CHECK-NEXT:    zip1 z7.d, z16.d, z7.d
-; CHECK-NEXT:    zip1 z1.d, z1.d, z2.d
-; CHECK-NEXT:    stp q0, q7, [x1, #96]
-; CHECK-NEXT:    stp q1, q4, [x1, #32]
+; CHECK-NEXT:    ldp q1, q0, [x0]
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z3.h, z1.h[1]
+; CHECK-NEXT:    mov z5.h, z0.h[3]
+; CHECK-NEXT:    mov z6.h, z0.h[2]
+; CHECK-NEXT:    mov z16.d, z0.d
+; CHECK-NEXT:    movprfx z2, z1
+; CHECK-NEXT:    fcvtzs z2.d, p0/m, z1.h
+; CHECK-NEXT:    mov z4.h, z1.h[3]
+; CHECK-NEXT:    mov z7.h, z1.h[2]
+; CHECK-NEXT:    mov z17.h, z0.h[1]
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT:    fcvtzs z5.d, p0/m, z5.h
+; CHECK-NEXT:    fcvtzs z6.d, p0/m, z6.h
+; CHECK-NEXT:    ext z16.b, z16.b, z0.b, #8
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT:    fcvtzs z4.d, p0/m, z4.h
+; CHECK-NEXT:    fcvtzs z17.d, p0/m, z17.h
+; CHECK-NEXT:    fcvtzs z7.d, p0/m, z7.h
+; CHECK-NEXT:    mov z20.h, z1.h[3]
+; CHECK-NEXT:    mov z18.h, z16.h[3]
+; CHECK-NEXT:    mov z19.h, z16.h[2]
+; CHECK-NEXT:    mov z21.h, z16.h[1]
+; CHECK-NEXT:    zip1 z2.d, z2.d, z3.d
+; CHECK-NEXT:    mov z3.h, z1.h[2]
+; CHECK-NEXT:    zip1 z5.d, z6.d, z5.d
+; CHECK-NEXT:    mov z6.h, z1.h[1]
+; CHECK-NEXT:    zip1 z0.d, z0.d, z17.d
+; CHECK-NEXT:    fcvtzs z16.d, p0/m, z16.h
+; CHECK-NEXT:    fcvtzs z18.d, p0/m, z18.h
+; CHECK-NEXT:    movprfx z17, z21
+; CHECK-NEXT:    fcvtzs z17.d, p0/m, z21.h
+; CHECK-NEXT:    fcvtzs z19.d, p0/m, z19.h
+; CHECK-NEXT:    zip1 z4.d, z7.d, z4.d
+; CHECK-NEXT:    movprfx z7, z20
+; CHECK-NEXT:    fcvtzs z7.d, p0/m, z20.h
+; CHECK-NEXT:    fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT:    stp q0, q5, [x1, #64]
+; CHECK-NEXT:    fcvtzs z6.d, p0/m, z6.h
+; CHECK-NEXT:    zip1 z0.d, z19.d, z18.d
+; CHECK-NEXT:    zip1 z5.d, z16.d, z17.d
+; CHECK-NEXT:    stp q2, q4, [x1]
+; CHECK-NEXT:    zip1 z2.d, z3.d, z7.d
+; CHECK-NEXT:    zip1 z1.d, z1.d, z6.d
+; CHECK-NEXT:    stp q5, q0, [x1, #96]
+; CHECK-NEXT:    stp q1, q2, [x1, #32]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index e595686cb4975d..d61f92b4062944 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -1142,10 +1142,9 @@ define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z1.d, z0.d[1]
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    fmov x9, d1
-; CHECK-NEXT:    ucvtf h0, x8
-; CHECK-NEXT:    ucvtf h1, x9
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT:    ucvtf z1.h, p0/m, z1.d
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -2596,10 +2595,9 @@ define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z1.d, z0.d[1]
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    fmov x9, d1
-; CHECK-NEXT:    scvtf h0, x8
-; CHECK-NEXT:    scvtf h1, x9
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT:    scvtf z1.h, p0/m, z1.d
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -2795,7 +2793,10 @@ define half @scvtf_i16_f16(ptr %0) {
 ; CHECK-LABEL: scvtf_i16_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldrsh w8, [x0]
-; CHECK-NEXT:    scvtf h0, w8
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_i16_f16:
@@ -2813,7 +2814,10 @@ define float @scvtf_i16_f32(ptr %0) {
 ; CHECK-LABEL: scvtf_i16_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldrsh w8, [x0]
-; CHECK-NEXT:    scvtf s0, w8
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_i16_f32:
@@ -2846,8 +2850,10 @@ define double @scvtf_i16_f64(ptr %0) {
 define half @scvtf_i32_f16(ptr %0) {
 ; CHECK-LABEL: scvtf_i32_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    scvtf h0, w8
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_i32_f16:
@@ -2864,8 +2870,10 @@ define half @scvtf_i32_f16(ptr %0) {
 define float @scvtf_i32_f32(ptr %0) {
 ; CHECK-LABEL: scvtf_i32_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    scvtf s0, w8
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_i32_f32:
@@ -2898,8 +2906,10 @@ define double @scvtf_i32_f64(ptr %0) {
 define half @scvtf_i64_f16(ptr %0) {
 ; CHECK-LABEL: scvtf_i64_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr x8, [x0]
-; CHECK-NEXT:    scvtf h0, x8
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_i64_f16:
@@ -2916,8 +2926,10 @@ define half @scvtf_i64_f16(ptr %0) {
 define float @scvtf_i64_f32(ptr %0) {
 ; CHECK-LABEL: scvtf_i64_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr x8, [x0]
-; CHECK-NEXT:    scvtf s0, x8
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_i64_f32:
@@ -2933,8 +2945,10 @@ define float @scvtf_i64_f32(ptr %0) {
 define double @scvtf_i64_f64(ptr %0) {
 ; CHECK-LABEL: scvtf_i64_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr x8, [x0]
-; CHECK-NEXT:    scvtf d0, x8
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_i64_f64:
@@ -2951,7 +2965,10 @@ define half @ucvtf_i16_f16(ptr %0) {
 ; CHECK-LABEL: ucvtf_i16_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    ucvtf h0, w8
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_i16_f16:
@@ -2969,7 +2986,10 @@ define float @ucvtf_i16_f32(ptr %0) {
 ; CHECK-LABEL: ucvtf_i16_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    ucvtf s0, w8
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_i16_f32:
@@ -3002,8 +3022,10 @@ define double @ucvtf_i16_f64(ptr %0) {
 define half @ucvtf_i32_f16(ptr %0) {
 ; CHECK-LABEL: ucvtf_i32_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    ucvtf h0, w8
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_i32_f16:
@@ -3020,8 +3042,10 @@ define half @ucvtf_i32_f16(ptr %0) {
 define float @ucvtf_i32_f32(ptr %0) {
 ; CHECK-LABEL: ucvtf_i32_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    ucvtf s0, w8
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_i32_f32:
@@ -3054,8 +3078,10 @@ define double @ucvtf_i32_f64(ptr %0) {
 define half @ucvtf_i64_f16(ptr %0) {
 ; CHECK-LABEL: ucvtf_i64_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr x8, [x0]
-; CHECK-NEXT:    ucvtf h0, x8
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_i64_f16:
@@ -3072,8 +3098,10 @@ define half @ucvtf_i64_f16(ptr %0) {
 define float @ucvtf_i64_f32(ptr %0) {
 ; CHECK-LABEL: ucvtf_i64_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr x8, [x0]
-; CHECK-NEXT:    ucvtf s0, x8
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_i64_f32:
@@ -3089,8 +3117,10 @@ define float @ucvtf_i64_f32(ptr %0) {
 define double @ucvtf_i64_f64(ptr %0) {
 ; CHECK-LABEL: ucvtf_i64_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr x8, [x0]
-; CHECK-NEXT:    ucvtf d0, x8
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_i64_f64:



More information about the llvm-commits mailing list