[llvm] cd0373e - [AArch64] Allow single-element vector FP converts with +sme2p2 (#112905)

Thu Oct 24 03:21:21 PDT 2024

Author: Benjamin Maxwell
Date: 2024-10-24T11:21:17+01:00
New Revision: cd0373e029fdca7d6d99677b805e728016574a1f

URL: https://github.com/llvm/llvm-project/commit/cd0373e029fdca7d6d99677b805e728016574a1f
DIFF: https://github.com/llvm/llvm-project/commit/cd0373e029fdca7d6d99677b805e728016574a1f.diff

LOG: [AArch64] Allow single-element vector FP converts with +sme2p2 (#112905)

Follow up to #112213 now that the +sme2p2 feature flag has landed. The
single-element vector variants of FCVTZS, FCVTZU, UCVTF, and SCVTF are
allowed in streaming SVE mode with +sme2p2.

Reference:
-
https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/FCVTZS--vector--integer---Floating-point-convert-to-signed-integer--rounding-toward-zero--vector--
-
https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/UCVTF--vector--integer---Unsigned-integer-convert-to-floating-point--vector--
-
https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/SCVTF--vector--integer---Signed-integer-convert-to-floating-point--vector--

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 99e3ed31643b6e..fe3c8578b52aa4 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -287,6 +287,10 @@ def HasSMEF16F16orSMEF8F16
 def HasNEONandIsStreamingSafe
     : Predicate<"Subtarget->hasNEON()">,
       AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
+// A subset of NEON instructions are legal in Streaming SVE mode only with +sme2p2.
+def HasNEONandIsSME2p2StreamingSafe
+    : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">,
+    AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
 def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
                                  AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
 def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -6315,8 +6319,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
 // Some float -> int -> float conversion patterns for which we want to keep the
 // int values in FP registers using the corresponding NEON instructions to
 // avoid more costly int <-> fp register transfers.
-// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
-let Predicates = [HasNEON] in {
+let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
 def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
           (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
 def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
@@ -6326,8 +6329,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
 def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
           (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
 
-// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
-let Predicates = [HasNEON, HasFullFP16] in {
+let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
 def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
           (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
 def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
@@ -6350,8 +6352,7 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
 
 // fp16: integer extraction from vector must be at least 32-bits to be legal.
 // Actual extraction result is then an in-reg sign-extension of lower 16-bits.
-// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
-let Predicates = [HasNEON, HasFullFP16] in {
+let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
 def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
                 (v8i16 FPR128:$Rn), (i64 0))), i16)))),
           (SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
index 9aadf3133ba197..f402463de7be81 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING
+; RUN: llc -force-streaming-compatible -mattr=+sme2p2  < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -11,11 +12,11 @@ define double @t1(double %x) {
 ; CHECK-NEXT:    scvtf d0, x8
 ; CHECK-NEXT:    ret
 ;
-; NON-STREAMING-LABEL: t1:
-; NON-STREAMING:       // %bb.0: // %entry
-; NON-STREAMING-NEXT:    fcvtzs d0, d0
-; NON-STREAMING-NEXT:    scvtf d0, d0
-; NON-STREAMING-NEXT:    ret
+; USE-NEON-NO-GPRS-LABEL: t1:
+; USE-NEON-NO-GPRS:       // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT:    fcvtzs d0, d0
+; USE-NEON-NO-GPRS-NEXT:    scvtf d0, d0
+; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptosi double %x to i64
   %conv1 = sitofp i64 %conv to double
@@ -29,11 +30,11 @@ define float @t2(float %x) {
 ; CHECK-NEXT:    scvtf s0, w8
 ; CHECK-NEXT:    ret
 ;
-; NON-STREAMING-LABEL: t2:
-; NON-STREAMING:       // %bb.0: // %entry
-; NON-STREAMING-NEXT:    fcvtzs s0, s0
-; NON-STREAMING-NEXT:    scvtf s0, s0
-; NON-STREAMING-NEXT:    ret
+; USE-NEON-NO-GPRS-LABEL: t2:
+; USE-NEON-NO-GPRS:       // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT:    fcvtzs s0, s0
+; USE-NEON-NO-GPRS-NEXT:    scvtf s0, s0
+; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptosi float %x to i32
   %conv1 = sitofp i32 %conv to float
@@ -49,13 +50,13 @@ define half @t3(half %x)  {
 ; CHECK-NEXT:    fcvt h0, s0
 ; CHECK-NEXT:    ret
 ;
-; NON-STREAMING-LABEL: t3:
-; NON-STREAMING:       // %bb.0: // %entry
-; NON-STREAMING-NEXT:    fcvt s0, h0
-; NON-STREAMING-NEXT:    fcvtzs s0, s0
-; NON-STREAMING-NEXT:    scvtf s0, s0
-; NON-STREAMING-NEXT:    fcvt h0, s0
-; NON-STREAMING-NEXT:    ret
+; USE-NEON-NO-GPRS-LABEL: t3:
+; USE-NEON-NO-GPRS:       // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT:    fcvt s0, h0
+; USE-NEON-NO-GPRS-NEXT:    fcvtzs s0, s0
+; USE-NEON-NO-GPRS-NEXT:    scvtf s0, s0
+; USE-NEON-NO-GPRS-NEXT:    fcvt h0, s0
+; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptosi half %x to i32
   %conv1 = sitofp i32 %conv to half
@@ -69,11 +70,11 @@ define double @t4(double %x) {
 ; CHECK-NEXT:    ucvtf d0, x8
 ; CHECK-NEXT:    ret
 ;
-; NON-STREAMING-LABEL: t4:
-; NON-STREAMING:       // %bb.0: // %entry
-; NON-STREAMING-NEXT:    fcvtzu d0, d0
-; NON-STREAMING-NEXT:    ucvtf d0, d0
-; NON-STREAMING-NEXT:    ret
+; USE-NEON-NO-GPRS-LABEL: t4:
+; USE-NEON-NO-GPRS:       // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT:    fcvtzu d0, d0
+; USE-NEON-NO-GPRS-NEXT:    ucvtf d0, d0
+; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptoui double %x to i64
   %conv1 = uitofp i64 %conv to double
@@ -87,11 +88,11 @@ define float @t5(float %x) {
 ; CHECK-NEXT:    ucvtf s0, w8
 ; CHECK-NEXT:    ret
 ;
-; NON-STREAMING-LABEL: t5:
-; NON-STREAMING:       // %bb.0: // %entry
-; NON-STREAMING-NEXT:    fcvtzu s0, s0
-; NON-STREAMING-NEXT:    ucvtf s0, s0
-; NON-STREAMING-NEXT:    ret
+; USE-NEON-NO-GPRS-LABEL: t5:
+; USE-NEON-NO-GPRS:       // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT:    fcvtzu s0, s0
+; USE-NEON-NO-GPRS-NEXT:    ucvtf s0, s0
+; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptoui float %x to i32
   %conv1 = uitofp i32 %conv to float
@@ -107,13 +108,13 @@ define half @t6(half %x)  {
 ; CHECK-NEXT:    fcvt h0, s0
 ; CHECK-NEXT:    ret
 ;
-; NON-STREAMING-LABEL: t6:
-; NON-STREAMING:       // %bb.0: // %entry
-; NON-STREAMING-NEXT:    fcvt s0, h0
-; NON-STREAMING-NEXT:    fcvtzu s0, s0
-; NON-STREAMING-NEXT:    ucvtf s0, s0
-; NON-STREAMING-NEXT:    fcvt h0, s0
-; NON-STREAMING-NEXT:    ret
+; USE-NEON-NO-GPRS-LABEL: t6:
+; USE-NEON-NO-GPRS:       // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT:    fcvt s0, h0
+; USE-NEON-NO-GPRS-NEXT:    fcvtzu s0, s0
+; USE-NEON-NO-GPRS-NEXT:    ucvtf s0, s0
+; USE-NEON-NO-GPRS-NEXT:    fcvt h0, s0
+; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptoui half %x to i32
   %conv1 = uitofp i32 %conv to half