[llvm] [AArch64] Avoid single-element vector fp converts in streaming[-compatible] functions (PR #112213)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 14 09:19:40 PDT 2024
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/112213
>From 26f2eb80b8d1b42c86ff18f3b6a8369476268408 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 14 Oct 2024 13:40:59 +0000
Subject: [PATCH 1/3] [AArch64] Avoid single-element vector fp converts in
streaming[-compatible] functions
The single-element vector variants of FCVTZS, FCVTZU, UCVTF, and SCVTF
are only supported in streaming[-compatible] functions with `+sme2p2`.
Reference:
- https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/FCVTZS--vector--integer---Floating-point-convert-to-signed-integer--rounding-toward-zero--vector--
- https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/UCVTF--vector--integer---Unsigned-integer-convert-to-floating-point--vector--
- https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/SCVTF--vector--integer---Signed-integer-convert-to-floating-point--vector--
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 17 ++-
.../sve-streaming-mode-cvt-fp-int-fp.ll | 121 ++++++++++++++++++
2 files changed, 132 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 325508b62a9f14..bd9da10300c7fd 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -247,6 +247,11 @@ def HasSMEF16F16orSMEF8F16
def HasNEONandIsStreamingSafe
: Predicate<"Subtarget->hasNEON()">,
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
+// A subet of NEON instructions legal in Streaming SVE mode with +sme2p2.
+// TODO: Change to check for hasSME2p2() once FEAT_SME2p2 is implemented.
+def HasNEONandIsSME2p2StreamingSafe
+ : Predicate<"Subtarget->isNeonAvailable()">,
+ AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -6237,7 +6242,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
// Some float -> int -> float conversion patterns for which we want to keep the
// int values in FP registers using the corresponding NEON instructions to
// avoid more costly int <-> fp register transfers.
-let Predicates = [HasNEONandIsStreamingSafe] in {
+let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
@@ -6247,7 +6252,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
-let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
+let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
@@ -6270,9 +6275,9 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
// fp16: integer extraction from vector must be at least 32-bits to be legal.
// Actual extraction result is then an in-reg sign-extension of lower 16-bits.
-let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
-def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
- (v8i16 FPR128:$Rn), (i64 0))), i16)))),
+let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
+def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
+ (v8i16 FPR128:$Rn), (i64 0))), i16)))),
(SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
// unsigned 32-bit extracted element is truncated to 16-bits using AND
@@ -6367,7 +6372,7 @@ def : Pat <(f64 (uint_to_fp (i32
(LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
// 64-bits -> double are handled in target specific dag combine:
// performIntToFpCombine.
-} // let Predicates = [HasNEONandIsStreamingSafe]
+} // let Predicates = [HasNEONandIsSME2p2StreamingSafe]
//===----------------------------------------------------------------------===//
// Advanced SIMD three different-sized vector instructions.
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
new file mode 100644
index 00000000000000..9aadf3133ba197
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define double @t1(double %x) {
+; CHECK-LABEL: t1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs x8, d0
+; CHECK-NEXT: scvtf d0, x8
+; CHECK-NEXT: ret
+;
+; NON-STREAMING-LABEL: t1:
+; NON-STREAMING: // %bb.0: // %entry
+; NON-STREAMING-NEXT: fcvtzs d0, d0
+; NON-STREAMING-NEXT: scvtf d0, d0
+; NON-STREAMING-NEXT: ret
+entry:
+ %conv = fptosi double %x to i64
+ %conv1 = sitofp i64 %conv to double
+ ret double %conv1
+}
+
+define float @t2(float %x) {
+; CHECK-LABEL: t2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w8, s0
+; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ret
+;
+; NON-STREAMING-LABEL: t2:
+; NON-STREAMING: // %bb.0: // %entry
+; NON-STREAMING-NEXT: fcvtzs s0, s0
+; NON-STREAMING-NEXT: scvtf s0, s0
+; NON-STREAMING-NEXT: ret
+entry:
+ %conv = fptosi float %x to i32
+ %conv1 = sitofp i32 %conv to float
+ ret float %conv1
+}
+
+define half @t3(half %x) {
+; CHECK-LABEL: t3:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvtzs w8, s0
+; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+;
+; NON-STREAMING-LABEL: t3:
+; NON-STREAMING: // %bb.0: // %entry
+; NON-STREAMING-NEXT: fcvt s0, h0
+; NON-STREAMING-NEXT: fcvtzs s0, s0
+; NON-STREAMING-NEXT: scvtf s0, s0
+; NON-STREAMING-NEXT: fcvt h0, s0
+; NON-STREAMING-NEXT: ret
+entry:
+ %conv = fptosi half %x to i32
+ %conv1 = sitofp i32 %conv to half
+ ret half %conv1
+}
+
+define double @t4(double %x) {
+; CHECK-LABEL: t4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu x8, d0
+; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: ret
+;
+; NON-STREAMING-LABEL: t4:
+; NON-STREAMING: // %bb.0: // %entry
+; NON-STREAMING-NEXT: fcvtzu d0, d0
+; NON-STREAMING-NEXT: ucvtf d0, d0
+; NON-STREAMING-NEXT: ret
+entry:
+ %conv = fptoui double %x to i64
+ %conv1 = uitofp i64 %conv to double
+ ret double %conv1
+}
+
+define float @t5(float %x) {
+; CHECK-LABEL: t5:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu w8, s0
+; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: ret
+;
+; NON-STREAMING-LABEL: t5:
+; NON-STREAMING: // %bb.0: // %entry
+; NON-STREAMING-NEXT: fcvtzu s0, s0
+; NON-STREAMING-NEXT: ucvtf s0, s0
+; NON-STREAMING-NEXT: ret
+entry:
+ %conv = fptoui float %x to i32
+ %conv1 = uitofp i32 %conv to float
+ ret float %conv1
+}
+
+define half @t6(half %x) {
+; CHECK-LABEL: t6:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvtzu w8, s0
+; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+;
+; NON-STREAMING-LABEL: t6:
+; NON-STREAMING: // %bb.0: // %entry
+; NON-STREAMING-NEXT: fcvt s0, h0
+; NON-STREAMING-NEXT: fcvtzu s0, s0
+; NON-STREAMING-NEXT: ucvtf s0, s0
+; NON-STREAMING-NEXT: fcvt h0, s0
+; NON-STREAMING-NEXT: ret
+entry:
+ %conv = fptoui half %x to i32
+ %conv1 = uitofp i32 %conv to half
+ ret half %conv1
+}
>From 243f6a719eab0095edc2aa388af209db161d275a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 14 Oct 2024 14:29:50 +0000
Subject: [PATCH 2/3] Fix typo
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index bd9da10300c7fd..a09b2958df18cb 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -247,7 +247,7 @@ def HasSMEF16F16orSMEF8F16
def HasNEONandIsStreamingSafe
: Predicate<"Subtarget->hasNEON()">,
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
-// A subet of NEON instructions legal in Streaming SVE mode with +sme2p2.
+// A subset of NEON instructions legal in Streaming SVE mode with +sme2p2.
// TODO: Change to check for hasSME2p2() once FEAT_SME2p2 is implemented.
def HasNEONandIsSME2p2StreamingSafe
: Predicate<"Subtarget->isNeonAvailable()">,
>From a005fce82a5deab83ee3859df4297da42e1d796e Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 14 Oct 2024 16:13:28 +0000
Subject: [PATCH 3/3] Fixups
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a09b2958df18cb..f1785457be5cb9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -247,11 +247,6 @@ def HasSMEF16F16orSMEF8F16
def HasNEONandIsStreamingSafe
: Predicate<"Subtarget->hasNEON()">,
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
-// A subset of NEON instructions legal in Streaming SVE mode with +sme2p2.
-// TODO: Change to check for hasSME2p2() once FEAT_SME2p2 is implemented.
-def HasNEONandIsSME2p2StreamingSafe
- : Predicate<"Subtarget->isNeonAvailable()">,
- AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -6242,7 +6237,8 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
// Some float -> int -> float conversion patterns for which we want to keep the
// int values in FP registers using the corresponding NEON instructions to
// avoid more costly int <-> fp register transfers.
-let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
+// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
+let Predicates = [HasNEON] in {
def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
@@ -6252,7 +6248,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
-let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
+let Predicates = [HasNEON, HasFullFP16] in {
def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
@@ -6275,7 +6271,7 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
// fp16: integer extraction from vector must be at least 32-bits to be legal.
// Actual extraction result is then an in-reg sign-extension of lower 16-bits.
-let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
+let Predicates = [HasNEON, HasFullFP16] in {
def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
(v8i16 FPR128:$Rn), (i64 0))), i16)))),
(SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
@@ -6372,7 +6368,7 @@ def : Pat <(f64 (uint_to_fp (i32
(LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
// 64-bits -> double are handled in target specific dag combine:
// performIntToFpCombine.
-} // let Predicates = [HasNEONandIsSME2p2StreamingSafe]
+} // let Predicates = [HasNEON]
//===----------------------------------------------------------------------===//
// Advanced SIMD three different-sized vector instructions.
More information about the llvm-commits
mailing list