[llvm] [AArch64][SVE] Use SVE for scalar FP converts in streaming[-compatible] functions (1/n) (PR #118505)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 3 07:54:19 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Benjamin Maxwell (MacDue)
<details>
<summary>Changes</summary>
In streaming[-compatible] functions, use SVE for scalar FP conversions to/from integer types. This can help avoid moves between FPRs and GRPs, which could be costly.
This patch also updates definitions of SCVTF_ZPmZ_StoD and UCVTF_ZPmZ_StoD to disallow lowering to them from ISD nodes, as doing so requires creating a [U|S]INT_TO_FP_MERGE_PASSTHRU node with inconsistent types.
Follow up to #<!-- -->112213.
Note: This PR does not include support for f64 <-> i32 conversions (like #<!-- -->112564), which needs a bit more work to support.
---
Patch is 50.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118505.diff
7 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+65)
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+2-2)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll (+75-19)
- (added) llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll (+252)
- (added) llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll (+252)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll (+156-200)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll (+62-32)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e1be825fcf7bf3..1edc39ce375937 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18989,6 +18989,65 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
return SDValue();
}
+/// Tries to replace scalar FP <-> INT conversions with SVE in streaming
+/// functions, this can help to reduce the number of fmovs to/from GPRs.
+static SDValue
+tryToReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ if (N->isStrictFPOpcode())
+ return SDValue();
+
+ if (!Subtarget->isSVEorStreamingSVEAvailable() ||
+ (!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible()))
+ return SDValue();
+
+ auto isSupportedType = [](EVT VT) {
+ if (!VT.isSimple())
+ return false;
+ // There are SVE instructions that can convert to/from all pairs of these
+ // int and float types. Note: We don't bother with i8 or i16 as those are
+ // illegal types for scalars.
+ return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
+ VT.getSimpleVT().SimpleTy);
+ };
+
+ if (!isSupportedType(N->getValueType(0)) ||
+ !isSupportedType(N->getOperand(0).getValueType()))
+ return SDValue();
+
+ SDValue SrcVal = N->getOperand(0);
+ EVT SrcTy = SrcVal.getValueType();
+ EVT DestTy = N->getValueType(0);
+
+ bool IsI32ToF64 = SrcTy == MVT::i32 && DestTy == MVT::f64;
+ bool isF64ToI32 = SrcTy == MVT::f64 && DestTy == MVT::i32;
+
+ // Conversions between f64 and i32 are a special case as nxv2i32 is an illegal
+ // type (unlike the equivalent nxv2f32 for floating-point types).
+ // TODO: Support these conversations.
+ if (IsI32ToF64 || isF64ToI32)
+ return SDValue();
+
+ EVT SrcVecTy;
+ EVT DestVecTy;
+ if (DestTy.bitsGT(SrcTy)) {
+ DestVecTy = getPackedSVEVectorVT(DestTy);
+ SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
+ : DestVecTy.changeVectorElementType(SrcTy);
+ } else {
+ SrcVecTy = getPackedSVEVectorVT(SrcTy);
+ DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
+ : SrcVecTy.changeVectorElementType(DestTy);
+ }
+
+ SDLoc DL(N);
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
+ SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy,
+ DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
+ SDValue Convert = DAG.getNode(N->getOpcode(), DL, DestVecTy, Vec);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Convert, ZeroIdx);
+}
+
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
// First try to optimize away the conversion when it's conditionally from
@@ -18996,6 +19055,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
return Res;
+ if (SDValue Res = tryToReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
+ return Res;
+
EVT VT = N->getValueType(0);
if (VT != MVT::f32 && VT != MVT::f64)
return SDValue();
@@ -19034,6 +19096,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
+ if (SDValue Res = tryToReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
+ return Res;
+
if (!Subtarget->isNeonAvailable())
return SDValue();
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a15e89be1a24b2..178ea149971b8f 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2348,8 +2348,8 @@ let Predicates = [HasSVEorSME] in {
defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f16, ElementSizeD>;
defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zdr<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, AArch64fcvtr_mt, nxv2f32, nxv2i1, nxv2f64, ElementSizeD>;
defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f32, ElementSizeD>;
- defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
- defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
+ defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
+ defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>;
defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
index f402463de7be81..1050dc0210a67e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -1,15 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible -mattr=+sme2p2 < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
-; RUN: llc < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sme2p2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc -mattr=+neon < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
define double @t1(double %x) {
; CHECK-LABEL: t1:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: scvtf d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t1:
@@ -17,6 +22,12 @@ define double @t1(double %x) {
; USE-NEON-NO-GPRS-NEXT: fcvtzs d0, d0
; USE-NEON-NO-GPRS-NEXT: scvtf d0, d0
; USE-NEON-NO-GPRS-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: t1:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs x8, d0
+; NONEON-NOSVE-NEXT: scvtf d0, x8
+; NONEON-NOSVE-NEXT: ret
entry:
%conv = fptosi double %x to i64
%conv1 = sitofp i64 %conv to double
@@ -26,8 +37,11 @@ entry:
define float @t2(float %x) {
; CHECK-LABEL: t2:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzs w8, s0
-; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t2:
@@ -35,6 +49,12 @@ define float @t2(float %x) {
; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0
; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: t2:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs w8, s0
+; NONEON-NOSVE-NEXT: scvtf s0, w8
+; NONEON-NOSVE-NEXT: ret
entry:
%conv = fptosi float %x to i32
%conv1 = sitofp i32 %conv to float
@@ -44,10 +64,11 @@ entry:
define half @t3(half %x) {
; CHECK-LABEL: t3:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvtzs w8, s0
-; CHECK-NEXT: scvtf s0, w8
-; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t3:
@@ -57,6 +78,14 @@ define half @t3(half %x) {
; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0
; USE-NEON-NO-GPRS-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: t3:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzs w8, s0
+; NONEON-NOSVE-NEXT: scvtf s0, w8
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
entry:
%conv = fptosi half %x to i32
%conv1 = sitofp i32 %conv to half
@@ -66,8 +95,11 @@ entry:
define double @t4(double %x) {
; CHECK-LABEL: t4:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzu x8, d0
-; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t4:
@@ -75,6 +107,12 @@ define double @t4(double %x) {
; USE-NEON-NO-GPRS-NEXT: fcvtzu d0, d0
; USE-NEON-NO-GPRS-NEXT: ucvtf d0, d0
; USE-NEON-NO-GPRS-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: t4:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu x8, d0
+; NONEON-NOSVE-NEXT: ucvtf d0, x8
+; NONEON-NOSVE-NEXT: ret
entry:
%conv = fptoui double %x to i64
%conv1 = uitofp i64 %conv to double
@@ -84,8 +122,11 @@ entry:
define float @t5(float %x) {
; CHECK-LABEL: t5:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzu w8, s0
-; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t5:
@@ -93,6 +134,12 @@ define float @t5(float %x) {
; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0
; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: t5:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu w8, s0
+; NONEON-NOSVE-NEXT: ucvtf s0, w8
+; NONEON-NOSVE-NEXT: ret
entry:
%conv = fptoui float %x to i32
%conv1 = uitofp i32 %conv to float
@@ -102,10 +149,11 @@ entry:
define half @t6(half %x) {
; CHECK-LABEL: t6:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvtzu w8, s0
-; CHECK-NEXT: ucvtf s0, w8
-; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t6:
@@ -115,6 +163,14 @@ define half @t6(half %x) {
; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0
; USE-NEON-NO-GPRS-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: t6:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzu w8, s0
+; NONEON-NOSVE-NEXT: ucvtf s0, w8
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
entry:
%conv = fptoui half %x to i32
%conv1 = uitofp i32 %conv to half
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
new file mode 100644
index 00000000000000..3ae0089d409d0e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @f16_to_s32(half %x) {
+; CHECK-LABEL: f16_to_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f16_to_s32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzs w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi half %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f16_to_s64(half %x) {
+; CHECK-LABEL: f16_to_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f16_to_s64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzs x0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi half %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f32_to_s32(float %x) {
+; CHECK-LABEL: f32_to_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f32_to_s32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi float %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f32_to_s64(float %x) {
+; CHECK-LABEL: f32_to_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f32_to_s64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs x0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi float %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f64_to_s32(double %x) {
+; CHECK-LABEL: f64_to_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f64_to_s32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs w0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi double %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f64_to_s64(double %x) {
+; CHECK-LABEL: f64_to_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f64_to_s64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs x0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptosi double %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f16_to_u32(half %x) {
+; CHECK-LABEL: f16_to_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f16_to_u32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzu w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui half %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f16_to_u64(half %x) {
+; CHECK-LABEL: f16_to_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f16_to_u64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzu x0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui half %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f32_to_u32(float %x) {
+; CHECK-LABEL: f32_to_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f32_to_u32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui float %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f32_to_u64(float %x) {
+; CHECK-LABEL: f32_to_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f32_to_u64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu x0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui float %x to i64
+ ret i64 %cvt
+}
+
+define i32 @f64_to_u32(double %x) {
+; CHECK-LABEL: f64_to_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu w0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f64_to_u32:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu w0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui double %x to i32
+ ret i32 %cvt
+}
+
+define i64 @f64_to_u64(double %x) {
+; CHECK-LABEL: f64_to_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: f64_to_u64:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu x0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = fptoui double %x to i64
+ ret i64 %cvt
+}
+
+define i32 @strict_convert_signed(double %x) {
+; CHECK-LABEL: strict_convert_signed:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w0, d0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_signed:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzs w0, d0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") #0
+ ret i32 %cvt
+}
+
+define i32 @strict_convert_unsigned(float %x) {
+; CHECK-LABEL: strict_convert_unsigned:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu w0, s0
+; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: strict_convert_unsigned:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: fcvtzu w0, s0
+; NONEON-NOSVE-NEXT: ret
+ entry:
+ %cvt = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0
+ ret i32 %cvt
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
new file mode 100644
index 00000000000000..f30d2d578fdeb7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-pr...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/118505
More information about the llvm-commits
mailing list