[llvm] Allow single-element vector FP converts with +fprcvt (PR #169692)
Amina Chabane via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 26 09:20:38 PST 2025
https://github.com/Amichaxx created https://github.com/llvm/llvm-project/pull/169692
None
>From 04b2e863edce75db6840ec68155dab7fed9443c1 Mon Sep 17 00:00:00 2001
From: Amichaxx <amina.chabane at arm.com>
Date: Tue, 25 Nov 2025 14:23:39 +0000
Subject: [PATCH] Allow single-element vector FP converts with +fprcvt
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 12 +-
llvm/lib/Target/AArch64/AArch64Subtarget.h | 4 +
.../sve-streaming-mode-cvt-fp-int-fp.ll | 218 ++++++++++++------
3 files changed, 160 insertions(+), 74 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0d63a9121310e..0fa5ca8e21b9c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -329,8 +329,8 @@ def HasNEONandIsStreamingSafe
: Predicate<"Subtarget->hasNEON()">,
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
// A subset of NEON instructions are legal in Streaming SVE mode only with +sme2p2.
-def HasNEONandIsSME2p2StreamingSafe
- : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">,
+def HasNEONandFPRCVTIsStreamingSafe
+ : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->isFPRCVTStreamingSafe())">,
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
@@ -6952,7 +6952,7 @@ let HasOneUse = 1 in {
def any_fp_to_sint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_sint $src0)>;
def any_fp_to_uint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_uint $src0)>;
}
-let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
+let Predicates = [HasNEONandFPRCVTIsStreamingSafe] in {
def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint_oneuse f64:$Rn)))),
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f32:$Rn)))),
@@ -6962,7 +6962,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint_oneuse f64:$Rn)))),
def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f32:$Rn)))),
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
-let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
+let Predicates = [HasNEONandFPRCVTIsStreamingSafe, HasFullFP16] in {
def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f16:$Rn)))),
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f16:$Rn)))),
@@ -6994,7 +6994,7 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
// fp16: integer extraction from vector must be at least 32-bits to be legal.
// Actual extraction result is then an in-reg sign-extension of lower 16-bits.
-let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
+let Predicates = [HasNEONandFPRCVTIsStreamingSafe, HasFullFP16] in {
def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
(v8i16 FPR128:$Rn), (i64 0))), i16)))),
(SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
@@ -7028,7 +7028,7 @@ multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
sub))>;
}
-let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
+let Predicates = [HasNEONandFPRCVTIsStreamingSafe, HasFullFP16] in {
defm : UIntToFPROLoadPat<f16, i32, zextloadi8,
UCVTFv1i16, ro8, LDRBroW, LDRBroX, bsub>;
def : Pat <(f16 (uint_to_fp (i32
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 8974965c41fe3..9edb524b16f18 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -217,6 +217,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
return isSVEAvailable() || (isSVEorStreamingSVEAvailable() && hasSME2());
}
+ bool isFPRCVTStreamingSafe() const {
+ return hasFPRCVT() && (!hasSMEFA64() && (isStreaming() || isStreamingCompatible()));
+ }
+
unsigned getMinVectorRegisterBitWidth() const {
// Don't assume any minimum vector size when PSTATE.SM may not be 0, because
// we don't yet support streaming-compatible codegen support that we trust
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
index 4ad5b38b256fe..5bb7674c684ed 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -1,19 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -mattr=+sme2p2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible -mattr=+fprcvt,+fullfp16 < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc -force-streaming-compatible -mattr=+fprcvt,+fullfp16,-neon < %s | FileCheck %s --check-prefix=NO-NEON
+; RUN: llc < %s | FileCheck %s --check-prefix=CHECK-NO-STREAMING
target triple = "aarch64-unknown-linux-gnu"
define double @t1(double %x) {
; CHECK-LABEL: t1:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
-; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: fcvtzs x8, d0
+; CHECK-NEXT: scvtf d0, x8
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t1:
@@ -22,11 +19,27 @@ define double @t1(double %x) {
; USE-NEON-NO-GPRS-NEXT: scvtf d0, d0
; USE-NEON-NO-GPRS-NEXT: ret
;
-; NONEON-NOSVE-LABEL: t1:
-; NONEON-NOSVE: // %bb.0: // %entry
-; NONEON-NOSVE-NEXT: fcvtzs x8, d0
-; NONEON-NOSVE-NEXT: scvtf d0, x8
-; NONEON-NOSVE-NEXT: ret
+; NO-NEON-LABEL: t1:
+; NO-NEON: // %bb.0: // %entry
+; NO-NEON-NEXT: fcvtzs x8, d0
+; NO-NEON-NEXT: scvtf d0, x8
+; NO-NEON-NEXT: ret
+;
+; CHECK-NO-STREAMING-LABEL: t1:
+; CHECK-NO-STREAMING: // %bb.0: // %entry
+; CHECK-NO-STREAMING-NEXT: fcvtzs d0, d0
+; CHECK-NO-STREAMING-NEXT: scvtf d0, d0
+; CHECK-NO-STREAMING-NEXT: ret
+; NO-NEON-NO-GPRS-LABEL: t1:
+; NO-NEON-NO-GPRS: // %bb.0: // %entry
+; NO-NEON-NO-GPRS-NEXT: fcvtzs x8, d0
+; NO-NEON-NO-GPRS-NEXT: scvtf d0, x8
+; NO-NEON-NO-GPRS-NEXT: ret
+; USE-NEON-LABEL: t1:
+; USE-NEON: // %bb.0: // %entry
+; USE-NEON-NEXT: fcvtzs d0, d0
+; USE-NEON-NEXT: scvtf d0, d0
+; USE-NEON-NEXT: ret
entry:
%conv = fptosi double %x to i64
%conv1 = sitofp i64 %conv to double
@@ -36,11 +49,8 @@ entry:
define float @t2(float %x) {
; CHECK-LABEL: t2:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
-; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: fcvtzs w8, s0
+; CHECK-NEXT: scvtf s0, w8
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t2:
@@ -49,11 +59,27 @@ define float @t2(float %x) {
; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: ret
;
-; NONEON-NOSVE-LABEL: t2:
-; NONEON-NOSVE: // %bb.0: // %entry
-; NONEON-NOSVE-NEXT: fcvtzs w8, s0
-; NONEON-NOSVE-NEXT: scvtf s0, w8
-; NONEON-NOSVE-NEXT: ret
+; NO-NEON-LABEL: t2:
+; NO-NEON: // %bb.0: // %entry
+; NO-NEON-NEXT: fcvtzs w8, s0
+; NO-NEON-NEXT: scvtf s0, w8
+; NO-NEON-NEXT: ret
+;
+; CHECK-NO-STREAMING-LABEL: t2:
+; CHECK-NO-STREAMING: // %bb.0: // %entry
+; CHECK-NO-STREAMING-NEXT: fcvtzs s0, s0
+; CHECK-NO-STREAMING-NEXT: scvtf s0, s0
+; CHECK-NO-STREAMING-NEXT: ret
+; NO-NEON-NO-GPRS-LABEL: t2:
+; NO-NEON-NO-GPRS: // %bb.0: // %entry
+; NO-NEON-NO-GPRS-NEXT: fcvtzs w8, s0
+; NO-NEON-NO-GPRS-NEXT: scvtf s0, w8
+; NO-NEON-NO-GPRS-NEXT: ret
+; USE-NEON-LABEL: t2:
+; USE-NEON: // %bb.0: // %entry
+; USE-NEON-NEXT: fcvtzs s0, s0
+; USE-NEON-NEXT: scvtf s0, s0
+; USE-NEON-NEXT: ret
entry:
%conv = fptosi float %x to i32
%conv1 = sitofp i32 %conv to float
@@ -63,11 +89,10 @@ entry:
define half @t3(half %x) {
; CHECK-LABEL: t3:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
-; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
-; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvtzs w8, s0
+; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t3:
@@ -76,13 +101,29 @@ define half @t3(half %x) {
; USE-NEON-NO-GPRS-NEXT: scvtf h0, h0
; USE-NEON-NO-GPRS-NEXT: ret
;
-; NONEON-NOSVE-LABEL: t3:
-; NONEON-NOSVE: // %bb.0: // %entry
-; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: fcvtzs w8, s0
-; NONEON-NOSVE-NEXT: scvtf s0, w8
-; NONEON-NOSVE-NEXT: fcvt h0, s0
-; NONEON-NOSVE-NEXT: ret
+; NO-NEON-LABEL: t3:
+; NO-NEON: // %bb.0: // %entry
+; NO-NEON-NEXT: fcvtzs w8, h0
+; NO-NEON-NEXT: scvtf h0, w8
+; NO-NEON-NEXT: ret
+;
+; CHECK-NO-STREAMING-LABEL: t3:
+; CHECK-NO-STREAMING: // %bb.0: // %entry
+; CHECK-NO-STREAMING-NEXT: fcvt s0, h0
+; CHECK-NO-STREAMING-NEXT: fcvtzs s0, s0
+; CHECK-NO-STREAMING-NEXT: scvtf s0, s0
+; CHECK-NO-STREAMING-NEXT: fcvt h0, s0
+; CHECK-NO-STREAMING-NEXT: ret
+; NO-NEON-NO-GPRS-LABEL: t3:
+; NO-NEON-NO-GPRS: // %bb.0: // %entry
+; NO-NEON-NO-GPRS-NEXT: fcvtzs w8, h0
+; NO-NEON-NO-GPRS-NEXT: scvtf h0, w8
+; NO-NEON-NO-GPRS-NEXT: ret
+; USE-NEON-LABEL: t3:
+; USE-NEON: // %bb.0: // %entry
+; USE-NEON-NEXT: fcvtzs h0, h0
+; USE-NEON-NEXT: scvtf h0, h0
+; USE-NEON-NEXT: ret
entry:
%conv = fptosi half %x to i32
%conv1 = sitofp i32 %conv to half
@@ -92,11 +133,8 @@ entry:
define double @t4(double %x) {
; CHECK-LABEL: t4:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
-; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: fcvtzu x8, d0
+; CHECK-NEXT: ucvtf d0, x8
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t4:
@@ -105,11 +143,27 @@ define double @t4(double %x) {
; USE-NEON-NO-GPRS-NEXT: ucvtf d0, d0
; USE-NEON-NO-GPRS-NEXT: ret
;
-; NONEON-NOSVE-LABEL: t4:
-; NONEON-NOSVE: // %bb.0: // %entry
-; NONEON-NOSVE-NEXT: fcvtzu x8, d0
-; NONEON-NOSVE-NEXT: ucvtf d0, x8
-; NONEON-NOSVE-NEXT: ret
+; NO-NEON-LABEL: t4:
+; NO-NEON: // %bb.0: // %entry
+; NO-NEON-NEXT: fcvtzu x8, d0
+; NO-NEON-NEXT: ucvtf d0, x8
+; NO-NEON-NEXT: ret
+;
+; CHECK-NO-STREAMING-LABEL: t4:
+; CHECK-NO-STREAMING: // %bb.0: // %entry
+; CHECK-NO-STREAMING-NEXT: fcvtzu d0, d0
+; CHECK-NO-STREAMING-NEXT: ucvtf d0, d0
+; CHECK-NO-STREAMING-NEXT: ret
+; NO-NEON-NO-GPRS-LABEL: t4:
+; NO-NEON-NO-GPRS: // %bb.0: // %entry
+; NO-NEON-NO-GPRS-NEXT: fcvtzu x8, d0
+; NO-NEON-NO-GPRS-NEXT: ucvtf d0, x8
+; NO-NEON-NO-GPRS-NEXT: ret
+; USE-NEON-LABEL: t4:
+; USE-NEON: // %bb.0: // %entry
+; USE-NEON-NEXT: fcvtzu d0, d0
+; USE-NEON-NEXT: ucvtf d0, d0
+; USE-NEON-NEXT: ret
entry:
%conv = fptoui double %x to i64
%conv1 = uitofp i64 %conv to double
@@ -119,11 +173,8 @@ entry:
define float @t5(float %x) {
; CHECK-LABEL: t5:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
-; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: fcvtzu w8, s0
+; CHECK-NEXT: ucvtf s0, w8
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t5:
@@ -132,11 +183,27 @@ define float @t5(float %x) {
; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: ret
;
-; NONEON-NOSVE-LABEL: t5:
-; NONEON-NOSVE: // %bb.0: // %entry
-; NONEON-NOSVE-NEXT: fcvtzu w8, s0
-; NONEON-NOSVE-NEXT: ucvtf s0, w8
-; NONEON-NOSVE-NEXT: ret
+; NO-NEON-LABEL: t5:
+; NO-NEON: // %bb.0: // %entry
+; NO-NEON-NEXT: fcvtzu w8, s0
+; NO-NEON-NEXT: ucvtf s0, w8
+; NO-NEON-NEXT: ret
+;
+; CHECK-NO-STREAMING-LABEL: t5:
+; CHECK-NO-STREAMING: // %bb.0: // %entry
+; CHECK-NO-STREAMING-NEXT: fcvtzu s0, s0
+; CHECK-NO-STREAMING-NEXT: ucvtf s0, s0
+; CHECK-NO-STREAMING-NEXT: ret
+; NO-NEON-NO-GPRS-LABEL: t5:
+; NO-NEON-NO-GPRS: // %bb.0: // %entry
+; NO-NEON-NO-GPRS-NEXT: fcvtzu w8, s0
+; NO-NEON-NO-GPRS-NEXT: ucvtf s0, w8
+; NO-NEON-NO-GPRS-NEXT: ret
+; USE-NEON-LABEL: t5:
+; USE-NEON: // %bb.0: // %entry
+; USE-NEON-NEXT: fcvtzu s0, s0
+; USE-NEON-NEXT: ucvtf s0, s0
+; USE-NEON-NEXT: ret
entry:
%conv = fptoui float %x to i32
%conv1 = uitofp i32 %conv to float
@@ -146,11 +213,10 @@ entry:
define half @t6(half %x) {
; CHECK-LABEL: t6:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
-; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
-; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvtzu w8, s0
+; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
;
; USE-NEON-NO-GPRS-LABEL: t6:
@@ -159,13 +225,29 @@ define half @t6(half %x) {
; USE-NEON-NO-GPRS-NEXT: ucvtf h0, h0
; USE-NEON-NO-GPRS-NEXT: ret
;
-; NONEON-NOSVE-LABEL: t6:
-; NONEON-NOSVE: // %bb.0: // %entry
-; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: fcvtzu w8, s0
-; NONEON-NOSVE-NEXT: ucvtf s0, w8
-; NONEON-NOSVE-NEXT: fcvt h0, s0
-; NONEON-NOSVE-NEXT: ret
+; NO-NEON-LABEL: t6:
+; NO-NEON: // %bb.0: // %entry
+; NO-NEON-NEXT: fcvtzu w8, h0
+; NO-NEON-NEXT: ucvtf h0, w8
+; NO-NEON-NEXT: ret
+;
+; CHECK-NO-STREAMING-LABEL: t6:
+; CHECK-NO-STREAMING: // %bb.0: // %entry
+; CHECK-NO-STREAMING-NEXT: fcvt s0, h0
+; CHECK-NO-STREAMING-NEXT: fcvtzu s0, s0
+; CHECK-NO-STREAMING-NEXT: ucvtf s0, s0
+; CHECK-NO-STREAMING-NEXT: fcvt h0, s0
+; CHECK-NO-STREAMING-NEXT: ret
+; NO-NEON-NO-GPRS-LABEL: t6:
+; NO-NEON-NO-GPRS: // %bb.0: // %entry
+; NO-NEON-NO-GPRS-NEXT: fcvtzu w8, h0
+; NO-NEON-NO-GPRS-NEXT: ucvtf h0, w8
+; NO-NEON-NO-GPRS-NEXT: ret
+; USE-NEON-LABEL: t6:
+; USE-NEON: // %bb.0: // %entry
+; USE-NEON-NEXT: fcvtzu h0, h0
+; USE-NEON-NEXT: ucvtf h0, h0
+; USE-NEON-NEXT: ret
entry:
%conv = fptoui half %x to i32
%conv1 = uitofp i32 %conv to half
More information about the llvm-commits
mailing list