[llvm] [AArch64][SVE] Use SVE for scalar FP converts in streaming[-compatible] functions (1/n) (PR #118505)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 06:38:40 PST 2024
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/118505
>From 9441ab2ceea90abab9a927f2ebf668fde0ee5e6b Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 3 Dec 2024 15:47:10 +0000
Subject: [PATCH 1/3] [AArch64][SVE] Use SVE for scalar FP converts in
streaming[-compatible] functions (1/n)
In streaming[-compatible] functions, use SVE for scalar FP conversions
to/from integer types. This can help avoid moves between FPRs and GRPs,
which could be costly.
This patch also updates definitions of SCVTF_ZPmZ_StoD and
UCVTF_ZPmZ_StoD to disallow lowering to them from ISD nodes, as doing
so requires creating a [U|S]INT_TO_FP_MERGE_PASSTHRU node with
inconsistent types.
Follow up to #112213.
Note: This PR does not include support for f64 <-> i32 conversions
(like #112564), which needs a bit more work to support.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 65 ++++
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 4 +-
.../sve-streaming-mode-cvt-fp-int-fp.ll | 94 ++++-
.../sve-streaming-mode-cvt-fp-to-int.ll | 252 +++++++++++++
.../sve-streaming-mode-cvt-int-to-fp.ll | 252 +++++++++++++
...e-streaming-mode-fixed-length-fp-to-int.ll | 356 ++++++++----------
...e-streaming-mode-fixed-length-int-to-fp.ll | 94 +++--
7 files changed, 864 insertions(+), 253 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c19265613c706d..16d077899f27a0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19083,6 +19083,65 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
return SDValue();
}
+/// Tries to replace scalar FP <-> INT conversions with SVE in streaming
+/// functions, this can help to reduce the number of fmovs to/from GPRs.
+static SDValue
+tryToReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ if (N->isStrictFPOpcode())
+ return SDValue();
+
+ if (!Subtarget->isSVEorStreamingSVEAvailable() ||
+ (!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible()))
+ return SDValue();
+
+ auto isSupportedType = [](EVT VT) {
+ if (!VT.isSimple())
+ return false;
+ // There are SVE instructions that can convert to/from all pairs of these
+ // int and float types. Note: We don't bother with i8 or i16 as those are
+ // illegal types for scalars.
+ return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
+ VT.getSimpleVT().SimpleTy);
+ };
+
+ if (!isSupportedType(N->getValueType(0)) ||
+ !isSupportedType(N->getOperand(0).getValueType()))
+ return SDValue();
+
+ SDValue SrcVal = N->getOperand(0);
+ EVT SrcTy = SrcVal.getValueType();
+ EVT DestTy = N->getValueType(0);
+
+ bool IsI32ToF64 = SrcTy == MVT::i32 && DestTy == MVT::f64;
+ bool isF64ToI32 = SrcTy == MVT::f64 && DestTy == MVT::i32;
+
+ // Conversions between f64 and i32 are a special case as nxv2i32 is an illegal
+ // type (unlike the equivalent nxv2f32 for floating-point types).
+ // TODO: Support these conversations.
+ if (IsI32ToF64 || isF64ToI32)
+ return SDValue();
+
+ EVT SrcVecTy;
+ EVT DestVecTy;
+ if (DestTy.bitsGT(SrcTy)) {
+ DestVecTy = getPackedSVEVectorVT(DestTy);
+ SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
+ : DestVecTy.changeVectorElementType(SrcTy);
+ } else {
+ SrcVecTy = getPackedSVEVectorVT(SrcTy);
+ DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
+ : SrcVecTy.changeVectorElementType(DestTy);
+ }
+
+ SDLoc DL(N);
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
+ SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy,
+ DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
+ SDValue Convert = DAG.getNode(N->getOpcode(), DL, DestVecTy, Vec);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Convert, ZeroIdx);
+}
+
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
// First try to optimize away the conversion when it's conditionally from
@@ -19090,6 +19149,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
return Res;
+ if (SDValue Res = tryToReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
+ return Res;
+
EVT VT = N->getValueType(0);
if (VT != MVT::f32 && VT != MVT::f64)
return SDValue();
@@ -19128,6 +19190,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
+ if (SDValue Res = tryToReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
+ return Res;
+
if (!Subtarget->isNeonAvailable())
return SDValue();
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 4a4412f9df6a1a..47182bd1143332 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2348,8 +2348,8 @@ let Predicates = [HasSVEorSME] in {
defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f16, ElementSizeD>;
defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zdr<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, AArch64fcvtr_mt, nxv2f32, nxv2i1, nxv2f64, ElementSizeD>;
defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f32, ElementSizeD>;
- defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
- defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
+ defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
+ defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>;
defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
index 0d291e0bf07983..7c3be85ef2d9f8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -1,15 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible -mattr=+sme2p2 < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
-; RUN: llc < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sme2p2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc -mattr=+neon < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
define double @t1(double %x) {
; CHECK-LABEL: t1:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: scvtf d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t1:
@@ -17,6 +22,12 @@ define double @t1(double %x) {
; USE-NEON-NO-GPRS-NEXT: fcvtzs d0, d0
; USE-NEON-NO-GPRS-NEXT: scvtf d0, d0
; USE-NEON-NO-GPRS-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: t1:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs x8, d0
+; NONEON-NOSVE-NEXT: scvtf d0, x8
+; NONEON-NOSVE-NEXT: ret
entry:
%conv = fptosi double %x to i64
%conv1 = sitofp i64 %conv to double
@@ -26,8 +37,11 @@ entry:
define float @t2(float %x) {
; CHECK-LABEL: t2:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzs w8, s0
-; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t2:
@@ -35,6 +49,12 @@ define float @t2(float %x) {
; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0
; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: t2:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs w8, s0
+; NONEON-NOSVE-NEXT: scvtf s0, w8
+; NONEON-NOSVE-NEXT: ret
entry:
%conv = fptosi float %x to i32
%conv1 = sitofp i32 %conv to float
@@ -44,11 +64,20 @@ entry:
define half @t3(half %x) {
; CHECK-LABEL: t3:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvtzs w8, s0
-; CHECK-NEXT: scvtf s0, w8
-; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: t3:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzs w8, s0
+; NONEON-NOSVE-NEXT: scvtf s0, w8
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
entry:
%conv = fptosi half %x to i32
%conv1 = sitofp i32 %conv to half
@@ -58,8 +87,11 @@ entry:
define double @t4(double %x) {
; CHECK-LABEL: t4:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzu x8, d0
-; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t4:
@@ -67,6 +99,12 @@ define double @t4(double %x) {
; USE-NEON-NO-GPRS-NEXT: fcvtzu d0, d0
; USE-NEON-NO-GPRS-NEXT: ucvtf d0, d0
; USE-NEON-NO-GPRS-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: t4:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu x8, d0
+; NONEON-NOSVE-NEXT: ucvtf d0, x8
+; NONEON-NOSVE-NEXT: ret
entry:
%conv = fptoui double %x to i64
%conv1 = uitofp i64 %conv to double
@@ -76,8 +114,11 @@ entry:
define float @t5(float %x) {
; CHECK-LABEL: t5:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzu w8, s0
-; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t5:
@@ -85,6 +126,12 @@ define float @t5(float %x) {
; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0
; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: t5:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu w8, s0
+; NONEON-NOSVE-NEXT: ucvtf s0, w8
+; NONEON-NOSVE-NEXT: ret
entry:
%conv = fptoui float %x to i32
%conv1 = uitofp i32 %conv to float
@@ -94,11 +141,20 @@ entry:
define half @t6(half %x) {
; CHECK-LABEL: t6:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvtzu w8, s0
-; CHECK-NEXT: ucvtf s0, w8
-; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: t6:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzu w8, s0
+; NONEON-NOSVE-NEXT: ucvtf s0, w8
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
entry:
%conv = fptoui half %x to i32
%conv1 = uitofp i32 %conv to half
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
new file mode 100644
index 00000000000000..3ae0089d409d0e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @f16_to_s32(half %x) {
+; CHECK-LABEL: f16_to_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f16_to_s32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzs w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi half %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f16_to_s64(half %x) {
+; CHECK-LABEL: f16_to_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f16_to_s64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzs x0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi half %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f32_to_s32(float %x) {
+; CHECK-LABEL: f32_to_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f32_to_s32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi float %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f32_to_s64(float %x) {
+; CHECK-LABEL: f32_to_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f32_to_s64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs x0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi float %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f64_to_s32(double %x) {
+; CHECK-LABEL: f64_to_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f64_to_s32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs w0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi double %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f64_to_s64(double %x) {
+; CHECK-LABEL: f64_to_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f64_to_s64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs x0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi double %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f16_to_u32(half %x) {
+; CHECK-LABEL: f16_to_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f16_to_u32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzu w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui half %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f16_to_u64(half %x) {
+; CHECK-LABEL: f16_to_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f16_to_u64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzu x0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui half %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f32_to_u32(float %x) {
+; CHECK-LABEL: f32_to_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f32_to_u32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui float %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f32_to_u64(float %x) {
+; CHECK-LABEL: f32_to_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f32_to_u64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu x0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui float %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f64_to_u32(double %x) {
+; CHECK-LABEL: f64_to_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu w0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f64_to_u32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu w0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui double %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f64_to_u64(double %x) {
+; CHECK-LABEL: f64_to_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f64_to_u64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu x0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui double %x to i64
+ ret i64 %cvt
+}
+
+define i32 @strict_convert_signed(double %x) {
+; CHECK-LABEL: strict_convert_signed:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_signed:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs w0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") #0
+ ret i32 %cvt
+}
+
+define i32 @strict_convert_unsigned(float %x) {
+; CHECK-LABEL: strict_convert_unsigned:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_unsigned:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0
+ ret i32 %cvt
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
new file mode 100644
index 00000000000000..f30d2d578fdeb7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define half @s32_to_f16(i32 %x) {
+; CHECK-LABEL: s32_to_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: s32_to_f16:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf s0, w0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = sitofp i32 %x to half
+ ret half %cvt
+}
+
+define float @s32_to_f32(i32 %x) {
+; CHECK-LABEL: s32_to_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: s32_to_f32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf s0, w0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = sitofp i32 %x to float
+ ret float %cvt
+}
+
+define double @s32_to_f64(i32 %x) {
+; CHECK-LABEL: s32_to_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf d0, w0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: s32_to_f64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf d0, w0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = sitofp i32 %x to double
+ ret double %cvt
+}
+
+define half @u32_to_f16(i32 %x) {
+; CHECK-LABEL: u32_to_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: u32_to_f16:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf s0, w0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = uitofp i32 %x to half
+ ret half %cvt
+}
+
+define float @u32_to_f32(i32 %x) {
+; CHECK-LABEL: u32_to_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: u32_to_f32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf s0, w0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = uitofp i32 %x to float
+ ret float %cvt
+}
+
+define double @u32_to_f64(i32 %x) {
+; CHECK-LABEL: u32_to_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf d0, w0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: u32_to_f64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf d0, w0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = uitofp i32 %x to double
+ ret double %cvt
+}
+
+define half @s64_to_f16(i64 %x) {
+; CHECK-LABEL: s64_to_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: s64_to_f16:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf s0, x0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = sitofp i64 %x to half
+ ret half %cvt
+}
+
+define float @s64_to_f32(i64 %x) {
+; CHECK-LABEL: s64_to_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: s64_to_f32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf s0, x0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = sitofp i64 %x to float
+ ret float %cvt
+}
+
+define double @s64_to_f64(i64 %x) {
+; CHECK-LABEL: s64_to_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: s64_to_f64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf d0, x0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = sitofp i64 %x to double
+ ret double %cvt
+}
+
+define half @u64_to_f16(i64 %x) {
+; CHECK-LABEL: u64_to_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: u64_to_f16:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf s0, x0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = uitofp i64 %x to half
+ ret half %cvt
+}
+
+define float @u64_to_f32(i64 %x) {
+; CHECK-LABEL: u64_to_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: u64_to_f32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf s0, x0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = uitofp i64 %x to float
+ ret float %cvt
+}
+
+define double @u64_to_f64(i64 %x) {
+; CHECK-LABEL: u64_to_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: u64_to_f64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf d0, x0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = uitofp i64 %x to double
+ ret double %cvt
+}
+
+define float @strict_convert_signed(i32 %x) {
+; CHECK-LABEL: strict_convert_signed:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf s0, w0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_signed:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: scvtf s0, w0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %cvt
+}
+
+define float @strict_convert_unsigned(i64 %x) {
+; CHECK-LABEL: strict_convert_unsigned:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf s0, x0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_unsigned:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ucvtf s0, x0
+; NONEON-NOSVE-NEXT: ret
+entry:
+ %cvt = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %cvt
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index 11fee267660c03..b61c30af379944 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -418,8 +418,10 @@ define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) {
define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) {
; CHECK-LABEL: fcvtzu_v1f16_v1i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu x8, h0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzu_v1f16_v1i64:
@@ -441,10 +443,9 @@ define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov z1.h, z0.h[1]
-; CHECK-NEXT: fcvtzu x8, h0
-; CHECK-NEXT: fcvtzu x9, h1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.h
; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -472,20 +473,17 @@ define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) {
; CHECK-LABEL: fcvtzu_v4f16_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.h, z0.h[3]
; CHECK-NEXT: mov z2.h, z0.h[2]
; CHECK-NEXT: mov z3.h, z0.h[1]
-; CHECK-NEXT: fcvtzu x10, h0
-; CHECK-NEXT: fcvtzu x8, h1
-; CHECK-NEXT: fcvtzu x9, h2
-; CHECK-NEXT: fcvtzu x11, h3
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT: fmov d1, x11
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.h
+; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.h
+; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.h
; CHECK-NEXT: zip1 z1.d, z2.d, z1.d
-; CHECK-NEXT: stp q1, q0, [x1]
+; CHECK-NEXT: zip1 z0.d, z0.d, z3.d
+; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i64:
@@ -522,36 +520,29 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) {
; CHECK-LABEL: fcvtzu_v8f16_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: mov z2.h, z0.h[3]
; CHECK-NEXT: mov z3.h, z0.h[2]
; CHECK-NEXT: mov z4.h, z0.h[1]
-; CHECK-NEXT: fcvtzu x10, h0
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: fcvtzu x8, h2
-; CHECK-NEXT: fcvtzu x9, h3
-; CHECK-NEXT: fcvtzu x11, h4
+; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.h
+; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzu z4.d, p0/m, z4.h
; CHECK-NEXT: mov z5.h, z1.h[3]
; CHECK-NEXT: mov z6.h, z1.h[2]
-; CHECK-NEXT: mov z2.h, z1.h[1]
-; CHECK-NEXT: fcvtzu x14, h1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: fcvtzu x12, h5
-; CHECK-NEXT: fcvtzu x13, h6
-; CHECK-NEXT: fcvtzu x15, h2
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT: fmov d1, x12
-; CHECK-NEXT: fmov d4, x13
-; CHECK-NEXT: zip1 z2.d, z2.d, z3.d
-; CHECK-NEXT: fmov d3, x14
-; CHECK-NEXT: zip1 z1.d, z4.d, z1.d
-; CHECK-NEXT: fmov d4, x15
-; CHECK-NEXT: stp q2, q0, [x1]
-; CHECK-NEXT: zip1 z3.d, z3.d, z4.d
-; CHECK-NEXT: stp q3, q1, [x1, #32]
+; CHECK-NEXT: mov z7.h, z1.h[1]
+; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.h
+; CHECK-NEXT: zip1 z2.d, z3.d, z2.d
+; CHECK-NEXT: zip1 z0.d, z0.d, z4.d
+; CHECK-NEXT: fcvtzu z5.d, p0/m, z5.h
+; CHECK-NEXT: fcvtzu z6.d, p0/m, z6.h
+; CHECK-NEXT: fcvtzu z7.d, p0/m, z7.h
+; CHECK-NEXT: stp q0, q2, [x1]
+; CHECK-NEXT: zip1 z3.d, z6.d, z5.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z7.d
+; CHECK-NEXT: stp q1, q3, [x1, #32]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i64:
@@ -604,67 +595,54 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) {
define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) {
; CHECK-LABEL: fcvtzu_v16f16_v16i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: mov z5.d, z1.d
-; CHECK-NEXT: mov z2.h, z0.h[3]
-; CHECK-NEXT: mov z4.h, z1.h[1]
-; CHECK-NEXT: mov z6.h, z1.h[3]
-; CHECK-NEXT: fcvtzu x9, h1
-; CHECK-NEXT: fcvtzu x8, h0
-; CHECK-NEXT: mov z7.h, z0.h[1]
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z5.b, z5.b, z1.b, #8
-; CHECK-NEXT: fcvtzu x10, h2
-; CHECK-NEXT: fcvtzu x11, h4
-; CHECK-NEXT: fcvtzu x12, h6
-; CHECK-NEXT: mov z1.h, z1.h[2]
-; CHECK-NEXT: mov z0.h, z0.h[2]
-; CHECK-NEXT: fmov d16, x9
-; CHECK-NEXT: mov z2.h, z3.h[3]
-; CHECK-NEXT: mov z4.h, z5.h[3]
-; CHECK-NEXT: fcvtzu x14, h3
-; CHECK-NEXT: fcvtzu x13, h1
-; CHECK-NEXT: fcvtzu x15, h5
-; CHECK-NEXT: mov z1.h, z3.h[1]
-; CHECK-NEXT: mov z6.h, z5.h[1]
-; CHECK-NEXT: mov z5.h, z5.h[2]
-; CHECK-NEXT: mov z3.h, z3.h[2]
-; CHECK-NEXT: fcvtzu x9, h2
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fcvtzu x10, h4
-; CHECK-NEXT: fmov d4, x11
-; CHECK-NEXT: fcvtzu x11, h7
-; CHECK-NEXT: fmov d7, x12
-; CHECK-NEXT: fcvtzu x12, h0
-; CHECK-NEXT: fmov d0, x13
-; CHECK-NEXT: fcvtzu x13, h1
-; CHECK-NEXT: fmov d1, x14
-; CHECK-NEXT: fcvtzu x14, h6
-; CHECK-NEXT: fmov d6, x15
-; CHECK-NEXT: fcvtzu x15, h5
-; CHECK-NEXT: fmov d5, x9
-; CHECK-NEXT: fcvtzu x9, h3
-; CHECK-NEXT: zip1 z4.d, z16.d, z4.d
-; CHECK-NEXT: fmov d16, x8
-; CHECK-NEXT: zip1 z0.d, z0.d, z7.d
-; CHECK-NEXT: fmov d3, x12
-; CHECK-NEXT: fmov d7, x10
-; CHECK-NEXT: stp q4, q0, [x1, #64]
-; CHECK-NEXT: fmov d0, x14
-; CHECK-NEXT: fmov d4, x9
-; CHECK-NEXT: zip1 z2.d, z3.d, z2.d
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: zip1 z0.d, z6.d, z0.d
-; CHECK-NEXT: zip1 z4.d, z4.d, z5.d
-; CHECK-NEXT: zip1 z3.d, z16.d, z3.d
-; CHECK-NEXT: fmov d16, x15
-; CHECK-NEXT: stp q3, q2, [x1]
-; CHECK-NEXT: fmov d2, x13
-; CHECK-NEXT: zip1 z7.d, z16.d, z7.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z2.d
-; CHECK-NEXT: stp q0, q7, [x1, #96]
-; CHECK-NEXT: stp q1, q4, [x1, #32]
+; CHECK-NEXT: ldp q1, q0, [x0]
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z3.h, z1.h[1]
+; CHECK-NEXT: mov z5.h, z0.h[3]
+; CHECK-NEXT: mov z6.h, z0.h[2]
+; CHECK-NEXT: mov z16.d, z0.d
+; CHECK-NEXT: movprfx z2, z1
+; CHECK-NEXT: fcvtzu z2.d, p0/m, z1.h
+; CHECK-NEXT: mov z4.h, z1.h[3]
+; CHECK-NEXT: mov z7.h, z1.h[2]
+; CHECK-NEXT: mov z17.h, z0.h[1]
+; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzu z5.d, p0/m, z5.h
+; CHECK-NEXT: fcvtzu z6.d, p0/m, z6.h
+; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzu z4.d, p0/m, z4.h
+; CHECK-NEXT: fcvtzu z17.d, p0/m, z17.h
+; CHECK-NEXT: fcvtzu z7.d, p0/m, z7.h
+; CHECK-NEXT: mov z20.h, z1.h[3]
+; CHECK-NEXT: mov z18.h, z16.h[3]
+; CHECK-NEXT: mov z19.h, z16.h[2]
+; CHECK-NEXT: mov z21.h, z16.h[1]
+; CHECK-NEXT: zip1 z2.d, z2.d, z3.d
+; CHECK-NEXT: mov z3.h, z1.h[2]
+; CHECK-NEXT: zip1 z5.d, z6.d, z5.d
+; CHECK-NEXT: mov z6.h, z1.h[1]
+; CHECK-NEXT: zip1 z0.d, z0.d, z17.d
+; CHECK-NEXT: fcvtzu z16.d, p0/m, z16.h
+; CHECK-NEXT: fcvtzu z18.d, p0/m, z18.h
+; CHECK-NEXT: movprfx z17, z21
+; CHECK-NEXT: fcvtzu z17.d, p0/m, z21.h
+; CHECK-NEXT: fcvtzu z19.d, p0/m, z19.h
+; CHECK-NEXT: zip1 z4.d, z7.d, z4.d
+; CHECK-NEXT: movprfx z7, z20
+; CHECK-NEXT: fcvtzu z7.d, p0/m, z20.h
+; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.h
+; CHECK-NEXT: stp q0, q5, [x1, #64]
+; CHECK-NEXT: fcvtzu z6.d, p0/m, z6.h
+; CHECK-NEXT: zip1 z0.d, z19.d, z18.d
+; CHECK-NEXT: zip1 z5.d, z16.d, z17.d
+; CHECK-NEXT: stp q2, q4, [x1]
+; CHECK-NEXT: zip1 z2.d, z3.d, z7.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z6.d
+; CHECK-NEXT: stp q5, q0, [x1, #96]
+; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i64:
@@ -2135,8 +2113,10 @@ define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) {
define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) {
; CHECK-LABEL: fcvtzs_v1f16_v1i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs x8, h0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzs_v1f16_v1i64:
@@ -2159,10 +2139,9 @@ define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov z1.h, z0.h[1]
-; CHECK-NEXT: fcvtzs x8, h0
-; CHECK-NEXT: fcvtzs x9, h1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h
; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -2190,20 +2169,17 @@ define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) {
; CHECK-LABEL: fcvtzs_v4f16_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.h, z0.h[3]
; CHECK-NEXT: mov z2.h, z0.h[2]
; CHECK-NEXT: mov z3.h, z0.h[1]
-; CHECK-NEXT: fcvtzs x10, h0
-; CHECK-NEXT: fcvtzs x8, h1
-; CHECK-NEXT: fcvtzs x9, h2
-; CHECK-NEXT: fcvtzs x11, h3
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT: fmov d1, x11
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h
; CHECK-NEXT: zip1 z1.d, z2.d, z1.d
-; CHECK-NEXT: stp q1, q0, [x1]
+; CHECK-NEXT: zip1 z0.d, z0.d, z3.d
+; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i64:
@@ -2240,36 +2216,29 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) {
; CHECK-LABEL: fcvtzs_v8f16_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: mov z2.h, z0.h[3]
; CHECK-NEXT: mov z3.h, z0.h[2]
; CHECK-NEXT: mov z4.h, z0.h[1]
-; CHECK-NEXT: fcvtzs x10, h0
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: fcvtzs x8, h2
-; CHECK-NEXT: fcvtzs x9, h3
-; CHECK-NEXT: fcvtzs x11, h4
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h
; CHECK-NEXT: mov z5.h, z1.h[3]
; CHECK-NEXT: mov z6.h, z1.h[2]
-; CHECK-NEXT: mov z2.h, z1.h[1]
-; CHECK-NEXT: fcvtzs x14, h1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: fcvtzs x12, h5
-; CHECK-NEXT: fcvtzs x13, h6
-; CHECK-NEXT: fcvtzs x15, h2
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT: fmov d1, x12
-; CHECK-NEXT: fmov d4, x13
-; CHECK-NEXT: zip1 z2.d, z2.d, z3.d
-; CHECK-NEXT: fmov d3, x14
-; CHECK-NEXT: zip1 z1.d, z4.d, z1.d
-; CHECK-NEXT: fmov d4, x15
-; CHECK-NEXT: stp q2, q0, [x1]
-; CHECK-NEXT: zip1 z3.d, z3.d, z4.d
-; CHECK-NEXT: stp q3, q1, [x1, #32]
+; CHECK-NEXT: mov z7.h, z1.h[1]
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT: zip1 z2.d, z3.d, z2.d
+; CHECK-NEXT: zip1 z0.d, z0.d, z4.d
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h
+; CHECK-NEXT: stp q0, q2, [x1]
+; CHECK-NEXT: zip1 z3.d, z6.d, z5.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z7.d
+; CHECK-NEXT: stp q1, q3, [x1, #32]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i64:
@@ -2322,67 +2291,54 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) {
define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) {
; CHECK-LABEL: fcvtzs_v16f16_v16i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: mov z5.d, z1.d
-; CHECK-NEXT: mov z2.h, z0.h[3]
-; CHECK-NEXT: mov z4.h, z1.h[1]
-; CHECK-NEXT: mov z6.h, z1.h[3]
-; CHECK-NEXT: fcvtzs x9, h1
-; CHECK-NEXT: fcvtzs x8, h0
-; CHECK-NEXT: mov z7.h, z0.h[1]
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z5.b, z5.b, z1.b, #8
-; CHECK-NEXT: fcvtzs x10, h2
-; CHECK-NEXT: fcvtzs x11, h4
-; CHECK-NEXT: fcvtzs x12, h6
-; CHECK-NEXT: mov z1.h, z1.h[2]
-; CHECK-NEXT: mov z0.h, z0.h[2]
-; CHECK-NEXT: fmov d16, x9
-; CHECK-NEXT: mov z2.h, z3.h[3]
-; CHECK-NEXT: mov z4.h, z5.h[3]
-; CHECK-NEXT: fcvtzs x14, h3
-; CHECK-NEXT: fcvtzs x13, h1
-; CHECK-NEXT: fcvtzs x15, h5
-; CHECK-NEXT: mov z1.h, z3.h[1]
-; CHECK-NEXT: mov z6.h, z5.h[1]
-; CHECK-NEXT: mov z5.h, z5.h[2]
-; CHECK-NEXT: mov z3.h, z3.h[2]
-; CHECK-NEXT: fcvtzs x9, h2
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fcvtzs x10, h4
-; CHECK-NEXT: fmov d4, x11
-; CHECK-NEXT: fcvtzs x11, h7
-; CHECK-NEXT: fmov d7, x12
-; CHECK-NEXT: fcvtzs x12, h0
-; CHECK-NEXT: fmov d0, x13
-; CHECK-NEXT: fcvtzs x13, h1
-; CHECK-NEXT: fmov d1, x14
-; CHECK-NEXT: fcvtzs x14, h6
-; CHECK-NEXT: fmov d6, x15
-; CHECK-NEXT: fcvtzs x15, h5
-; CHECK-NEXT: fmov d5, x9
-; CHECK-NEXT: fcvtzs x9, h3
-; CHECK-NEXT: zip1 z4.d, z16.d, z4.d
-; CHECK-NEXT: fmov d16, x8
-; CHECK-NEXT: zip1 z0.d, z0.d, z7.d
-; CHECK-NEXT: fmov d3, x12
-; CHECK-NEXT: fmov d7, x10
-; CHECK-NEXT: stp q4, q0, [x1, #64]
-; CHECK-NEXT: fmov d0, x14
-; CHECK-NEXT: fmov d4, x9
-; CHECK-NEXT: zip1 z2.d, z3.d, z2.d
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: zip1 z0.d, z6.d, z0.d
-; CHECK-NEXT: zip1 z4.d, z4.d, z5.d
-; CHECK-NEXT: zip1 z3.d, z16.d, z3.d
-; CHECK-NEXT: fmov d16, x15
-; CHECK-NEXT: stp q3, q2, [x1]
-; CHECK-NEXT: fmov d2, x13
-; CHECK-NEXT: zip1 z7.d, z16.d, z7.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z2.d
-; CHECK-NEXT: stp q0, q7, [x1, #96]
-; CHECK-NEXT: stp q1, q4, [x1, #32]
+; CHECK-NEXT: ldp q1, q0, [x0]
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z3.h, z1.h[1]
+; CHECK-NEXT: mov z5.h, z0.h[3]
+; CHECK-NEXT: mov z6.h, z0.h[2]
+; CHECK-NEXT: mov z16.d, z0.d
+; CHECK-NEXT: movprfx z2, z1
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.h
+; CHECK-NEXT: mov z4.h, z1.h[3]
+; CHECK-NEXT: mov z7.h, z1.h[2]
+; CHECK-NEXT: mov z17.h, z0.h[1]
+; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h
+; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h
+; CHECK-NEXT: fcvtzs z17.d, p0/m, z17.h
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h
+; CHECK-NEXT: mov z20.h, z1.h[3]
+; CHECK-NEXT: mov z18.h, z16.h[3]
+; CHECK-NEXT: mov z19.h, z16.h[2]
+; CHECK-NEXT: mov z21.h, z16.h[1]
+; CHECK-NEXT: zip1 z2.d, z2.d, z3.d
+; CHECK-NEXT: mov z3.h, z1.h[2]
+; CHECK-NEXT: zip1 z5.d, z6.d, z5.d
+; CHECK-NEXT: mov z6.h, z1.h[1]
+; CHECK-NEXT: zip1 z0.d, z0.d, z17.d
+; CHECK-NEXT: fcvtzs z16.d, p0/m, z16.h
+; CHECK-NEXT: fcvtzs z18.d, p0/m, z18.h
+; CHECK-NEXT: movprfx z17, z21
+; CHECK-NEXT: fcvtzs z17.d, p0/m, z21.h
+; CHECK-NEXT: fcvtzs z19.d, p0/m, z19.h
+; CHECK-NEXT: zip1 z4.d, z7.d, z4.d
+; CHECK-NEXT: movprfx z7, z20
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z20.h
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT: stp q0, q5, [x1, #64]
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h
+; CHECK-NEXT: zip1 z0.d, z19.d, z18.d
+; CHECK-NEXT: zip1 z5.d, z16.d, z17.d
+; CHECK-NEXT: stp q2, q4, [x1]
+; CHECK-NEXT: zip1 z2.d, z3.d, z7.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z6.d
+; CHECK-NEXT: stp q5, q0, [x1, #96]
+; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index e595686cb4975d..d61f92b4062944 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -1142,10 +1142,9 @@ define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: mov z1.d, z0.d[1]
-; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: fmov x9, d1
-; CHECK-NEXT: ucvtf h0, x8
-; CHECK-NEXT: ucvtf h1, x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: ucvtf z1.h, p0/m, z1.d
; CHECK-NEXT: zip1 z0.h, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -2596,10 +2595,9 @@ define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: mov z1.d, z0.d[1]
-; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: fmov x9, d1
-; CHECK-NEXT: scvtf h0, x8
-; CHECK-NEXT: scvtf h1, x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: scvtf z1.h, p0/m, z1.d
; CHECK-NEXT: zip1 z0.h, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -2795,7 +2793,10 @@ define half @scvtf_i16_f16(ptr %0) {
; CHECK-LABEL: scvtf_i16_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrsh w8, [x0]
-; CHECK-NEXT: scvtf h0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i16_f16:
@@ -2813,7 +2814,10 @@ define float @scvtf_i16_f32(ptr %0) {
; CHECK-LABEL: scvtf_i16_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrsh w8, [x0]
-; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i16_f32:
@@ -2846,8 +2850,10 @@ define double @scvtf_i16_f64(ptr %0) {
define half @scvtf_i32_f16(ptr %0) {
; CHECK-LABEL: scvtf_i32_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: scvtf h0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i32_f16:
@@ -2864,8 +2870,10 @@ define half @scvtf_i32_f16(ptr %0) {
define float @scvtf_i32_f32(ptr %0) {
; CHECK-LABEL: scvtf_i32_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i32_f32:
@@ -2898,8 +2906,10 @@ define double @scvtf_i32_f64(ptr %0) {
define half @scvtf_i64_f16(ptr %0) {
; CHECK-LABEL: scvtf_i64_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: scvtf h0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i64_f16:
@@ -2916,8 +2926,10 @@ define half @scvtf_i64_f16(ptr %0) {
define float @scvtf_i64_f32(ptr %0) {
; CHECK-LABEL: scvtf_i64_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i64_f32:
@@ -2933,8 +2945,10 @@ define float @scvtf_i64_f32(ptr %0) {
define double @scvtf_i64_f64(ptr %0) {
; CHECK-LABEL: scvtf_i64_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: scvtf d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: scvtf_i64_f64:
@@ -2951,7 +2965,10 @@ define half @ucvtf_i16_f16(ptr %0) {
; CHECK-LABEL: ucvtf_i16_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ucvtf h0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i16_f16:
@@ -2969,7 +2986,10 @@ define float @ucvtf_i16_f32(ptr %0) {
; CHECK-LABEL: ucvtf_i16_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i16_f32:
@@ -3002,8 +3022,10 @@ define double @ucvtf_i16_f64(ptr %0) {
define half @ucvtf_i32_f16(ptr %0) {
; CHECK-LABEL: ucvtf_i32_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ucvtf h0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i32_f16:
@@ -3020,8 +3042,10 @@ define half @ucvtf_i32_f16(ptr %0) {
define float @ucvtf_i32_f32(ptr %0) {
; CHECK-LABEL: ucvtf_i32_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i32_f32:
@@ -3054,8 +3078,10 @@ define double @ucvtf_i32_f64(ptr %0) {
define half @ucvtf_i64_f16(ptr %0) {
; CHECK-LABEL: ucvtf_i64_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ucvtf h0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i64_f16:
@@ -3072,8 +3098,10 @@ define half @ucvtf_i64_f16(ptr %0) {
define float @ucvtf_i64_f32(ptr %0) {
; CHECK-LABEL: ucvtf_i64_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i64_f32:
@@ -3089,8 +3117,10 @@ define float @ucvtf_i64_f32(ptr %0) {
define double @ucvtf_i64_f64(ptr %0) {
; CHECK-LABEL: ucvtf_i64_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: ucvtf_i64_f64:
>From d04e63cdf9b214f8fdf73a8d614141716dbd3e29 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 5 Dec 2024 17:43:25 +0000
Subject: [PATCH 2/3] Fixups
---
.../Target/AArch64/AArch64ISelLowering.cpp | 60 ++++++++++++-------
.../sve-streaming-mode-cvt-fp-int-fp.ll | 1 -
2 files changed, 37 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 16d077899f27a0..dc6dd40fdd9d13 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19087,69 +19087,82 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
/// functions, this can help to reduce the number of fmovs to/from GPRs.
static SDValue
tryToReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (N->isStrictFPOpcode())
return SDValue();
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
if (!Subtarget->isSVEorStreamingSVEAvailable() ||
(!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible()))
return SDValue();
auto isSupportedType = [](EVT VT) {
- if (!VT.isSimple())
- return false;
- // There are SVE instructions that can convert to/from all pairs of these
- // int and float types. Note: We don't bother with i8 or i16 as those are
- // illegal types for scalars.
- return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
- VT.getSimpleVT().SimpleTy);
+ return VT != MVT::bf16 && VT != MVT::f128;
};
if (!isSupportedType(N->getValueType(0)) ||
!isSupportedType(N->getOperand(0).getValueType()))
return SDValue();
+ // Look through fp_extends to avoid extra fcvts.
SDValue SrcVal = N->getOperand(0);
+ if (SrcVal->getOpcode() == ISD::FP_EXTEND &&
+ isSupportedType(SrcVal->getOperand(0).getValueType()))
+ SrcVal = SrcVal->getOperand(0);
+
EVT SrcTy = SrcVal.getValueType();
EVT DestTy = N->getValueType(0);
- bool IsI32ToF64 = SrcTy == MVT::i32 && DestTy == MVT::f64;
- bool isF64ToI32 = SrcTy == MVT::f64 && DestTy == MVT::i32;
-
- // Conversions between f64 and i32 are a special case as nxv2i32 is an illegal
- // type (unlike the equivalent nxv2f32 for floating-point types).
- // TODO: Support these conversations.
- if (IsI32ToF64 || isF64ToI32)
- return SDValue();
+ // Merge in any subsequent fp_round to avoid extra fcvts.
+ SDNode *FPRoundNode = nullptr;
+ if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND &&
+ isSupportedType(N->use_begin()->getValueType(0))) {
+ FPRoundNode = *N->use_begin();
+ DestTy = FPRoundNode->getValueType(0);
+ }
EVT SrcVecTy;
EVT DestVecTy;
if (DestTy.bitsGT(SrcTy)) {
DestVecTy = getPackedSVEVectorVT(DestTy);
- SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
- : DestVecTy.changeVectorElementType(SrcTy);
+ SrcVecTy = DestVecTy.changeVectorElementType(SrcTy);
} else {
SrcVecTy = getPackedSVEVectorVT(SrcTy);
- DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
- : SrcVecTy.changeVectorElementType(DestTy);
+ DestVecTy = SrcVecTy.changeVectorElementType(DestTy);
}
+ // Ensure the resulting src/dest vector type is legal.
+ if (SrcVecTy == MVT::nxv2i32 || DestVecTy == MVT::nxv2i32)
+ return SDValue();
+
SDLoc DL(N);
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy,
DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
SDValue Convert = DAG.getNode(N->getOpcode(), DL, DestVecTy, Vec);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Convert, ZeroIdx);
+ SDValue Scalar =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Convert, ZeroIdx);
+
+ if (FPRoundNode) {
+ DAG.ReplaceAllUsesWith(SDValue(FPRoundNode, 0), Scalar);
+ return SDValue();
+ }
+ return Scalar;
}
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
// First try to optimize away the conversion when it's conditionally from
// a constant. Vectors only.
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
return Res;
- if (SDValue Res = tryToReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
+ if (SDValue Res =
+ tryToReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget))
return Res;
EVT VT = N->getValueType(0);
@@ -19190,7 +19203,8 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
- if (SDValue Res = tryToReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
+ if (SDValue Res =
+ tryToReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget))
return Res;
if (!Subtarget->isNeonAvailable())
@@ -26273,7 +26287,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performMulCombine(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- return performIntToFpCombine(N, DAG, Subtarget);
+ return performIntToFpCombine(N, DAG, DCI, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT_SAT:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
index 7c3be85ef2d9f8..f4ae66a3b2259b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -2,7 +2,6 @@
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -mattr=+sme2p2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
-; RUN: llc -mattr=+neon < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
>From f6527548acb03b94c1c648927ff8810e20482555 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 16 Dec 2024 14:23:52 +0000
Subject: [PATCH 3/3] Fixups
---
.../lib/Target/AArch64/AArch64ISelLowering.cpp | 18 ------------------
1 file changed, 18 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dc6dd40fdd9d13..5da2c60f5ed9dc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19107,23 +19107,10 @@ tryToReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
!isSupportedType(N->getOperand(0).getValueType()))
return SDValue();
- // Look through fp_extends to avoid extra fcvts.
SDValue SrcVal = N->getOperand(0);
- if (SrcVal->getOpcode() == ISD::FP_EXTEND &&
- isSupportedType(SrcVal->getOperand(0).getValueType()))
- SrcVal = SrcVal->getOperand(0);
-
EVT SrcTy = SrcVal.getValueType();
EVT DestTy = N->getValueType(0);
- // Merge in any subsequent fp_round to avoid extra fcvts.
- SDNode *FPRoundNode = nullptr;
- if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND &&
- isSupportedType(N->use_begin()->getValueType(0))) {
- FPRoundNode = *N->use_begin();
- DestTy = FPRoundNode->getValueType(0);
- }
-
EVT SrcVecTy;
EVT DestVecTy;
if (DestTy.bitsGT(SrcTy)) {
@@ -19145,11 +19132,6 @@ tryToReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
SDValue Convert = DAG.getNode(N->getOpcode(), DL, DestVecTy, Vec);
SDValue Scalar =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Convert, ZeroIdx);
-
- if (FPRoundNode) {
- DAG.ReplaceAllUsesWith(SDValue(FPRoundNode, 0), Scalar);
- return SDValue();
- }
return Scalar;
}
More information about the llvm-commits
mailing list