[llvm] [AArch64] Consider streaming mode in TTI interfaces for vectorization. (PR #96305)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 21 06:33:14 PDT 2024


https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/96305

At the moment, vectorization is only enabled in streaming(-compatible) mode when enabled through an option. But the interfaces should check more than just 'hasSVE()', because a function with +sme in streaming mode should also vectorize with the option enabled.

Additionally, a streaming-compatible function should only be able to use fixed-length autovec if SVE is available, otherwise the vector code will be scalarised by the backend.

>From ad6b15f346b7fe35b2faebafdf3d3a11e7addeb9 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 18 Jun 2024 09:31:35 +0100
Subject: [PATCH] [AArch64] Consider streaming mode in TTI interfaces for
 vectorization.

At the moment, vectorization is only enabled in streaming(-compatible)
mode when enabled through an option. But the interfaces should check
more than just 'hasSVE()', because a function with +sme in streaming
mode should also vectorize with the option enabled.

Additionally, a streaming-compatible function should only be able to use
fixed-length autovec if SVE is available, otherwise the vector code will be
scalarised by the backend.
---
 .../AArch64/AArch64TargetTransformInfo.cpp    | 21 ++++++++++---------
 .../AArch64/AArch64TargetTransformInfo.h      |  6 ++++--
 .../LoopVectorize/AArch64/sme-vectorize.ll    |  6 +++---
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 9f5756fc7e401..632cb23619ddb 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2162,19 +2162,20 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
   case TargetTransformInfo::RGK_Scalar:
     return TypeSize::getFixed(64);
   case TargetTransformInfo::RGK_FixedWidthVector:
-    if (!ST->isNeonAvailable() && !EnableFixedwidthAutovecInStreamingMode)
-      return TypeSize::getFixed(0);
-
-    if (ST->hasSVE())
+    if (ST->useSVEForFixedLengthVectors() &&
+        (ST->isSVEAvailable() || EnableFixedwidthAutovecInStreamingMode))
       return TypeSize::getFixed(
           std::max(ST->getMinSVEVectorSizeInBits(), 128u));
-
-    return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
+    else if (ST->isNeonAvailable())
+      return TypeSize::getFixed(128);
+    else
+      return TypeSize::getFixed(0);
   case TargetTransformInfo::RGK_ScalableVector:
-    if (!ST->isSVEAvailable() && !EnableScalableAutovecInStreamingMode)
+    if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
+                                 EnableScalableAutovecInStreamingMode))
+      return TypeSize::getScalable(128);
+    else
       return TypeSize::getScalable(0);
-
-    return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
   }
   llvm_unreachable("Unsupported register kind");
 }
@@ -4234,4 +4235,4 @@ bool AArch64TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
                     C2.NumIVMuls, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
 
   return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
-}
\ No newline at end of file
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index e7d3a5899cef1..417e72da9ca10 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -373,9 +373,11 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
 
   bool preferPredicateOverEpilogue(TailFoldingInfo *TFI);
 
-  bool supportsScalableVectors() const { return ST->hasSVE(); }
+  bool supportsScalableVectors() const {
+    return ST->isSVEorStreamingSVEAvailable();
+  }
 
-  bool enableScalableVectorization() const { return ST->hasSVE(); }
+  bool enableScalableVectorization() const { return ST->isSVEAvailable(); }
 
   bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
                                    ElementCount VF) const;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sme-vectorize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sme-vectorize.ll
index 37a09491e0ca7..8e3b55ce2646c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sme-vectorize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sme-vectorize.ll
@@ -5,9 +5,9 @@
 ; RUN: sed -e s/REPLACE_PSTATE_MACRO/aarch64_pstate_sm_body/ %s | opt -mattr="+sme,+sve" -passes=loop-vectorize,slp-vectorizer -S - | FileCheck %s --check-prefix=CHECK
 
 ; __arm_streaming function, force use of scalable autovec. Should vectorize.
-; RUN: sed -e s/REPLACE_PSTATE_MACRO/aarch64_pstate_sm_enabled/ %s | opt -mattr="+sme" -passes=loop-vectorize,slp-vectorizer -S -enable-scalable-autovec-in-streaming-mode - | FileCheck %s --check-prefix=CHECK
+; RUN: sed -e s/REPLACE_PSTATE_MACRO/aarch64_pstate_sm_enabled/ %s | opt -mattr="+sme" -passes=loop-vectorize,slp-vectorizer -S -enable-scalable-autovec-in-streaming-mode - | FileCheck %s --check-prefix=CHECK-FORCE-SCALABLE
 ; RUN: sed -e s/REPLACE_PSTATE_MACRO/aarch64_pstate_sm_enabled/ %s | opt -mattr="+sme,+sve" -passes=loop-vectorize,slp-vectorizer -S -enable-scalable-autovec-in-streaming-mode - | FileCheck %s --check-prefix=CHECK-FORCE-SCALABLE
-; RUN: sed -e s/REPLACE_PSTATE_MACRO/aarch64_pstate_sm_body/ %s | opt -mattr="+sme" -passes=loop-vectorize,slp-vectorizer -S -enable-scalable-autovec-in-streaming-mode - | FileCheck %s --check-prefix=CHECK
+; RUN: sed -e s/REPLACE_PSTATE_MACRO/aarch64_pstate_sm_body/ %s | opt -mattr="+sme" -passes=loop-vectorize,slp-vectorizer -S -enable-scalable-autovec-in-streaming-mode - | FileCheck %s --check-prefix=CHECK-FORCE-SCALABLE
 ; RUN: sed -e s/REPLACE_PSTATE_MACRO/aarch64_pstate_sm_body/ %s | opt -mattr="+sme,+sve" -passes=loop-vectorize,slp-vectorizer -S -enable-scalable-autovec-in-streaming-mode - | FileCheck %s --check-prefix=CHECK-FORCE-SCALABLE
 
 ; __arm_streaming function, force use of fixed-width autovec. Should vectorize.
@@ -25,7 +25,7 @@
 ; RUN: sed -e s/REPLACE_PSTATE_MACRO/aarch64_pstate_sm_compatible/ %s | opt -mattr="+sme,+sve" -passes=loop-vectorize,slp-vectorizer -S -enable-scalable-autovec-in-streaming-mode - | FileCheck %s --check-prefix=CHECK-FORCE-SCALABLE
 
 ; __arm_streaming_compatible function, force use of fixed-width autovec. Can only vectorize if +sve is available.
-; RUN: sed -e s/REPLACE_PSTATE_MACRO/aarch64_pstate_sm_compatible/ %s | opt -mattr="+sme" -passes=loop-vectorize,slp-vectorizer -S -enable-fixedwidth-autovec-in-streaming-mode - | FileCheck %s --check-prefix=CHECK-FORCE-FIXEDWIDTH
+; RUN: sed -e s/REPLACE_PSTATE_MACRO/aarch64_pstate_sm_compatible/ %s | opt -mattr="+sme" -passes=loop-vectorize,slp-vectorizer -S -enable-fixedwidth-autovec-in-streaming-mode - | FileCheck %s
 ; RUN: sed -e s/REPLACE_PSTATE_MACRO/aarch64_pstate_sm_compatible/ %s | opt -mattr="+sme,+sve" -passes=loop-vectorize,slp-vectorizer -S -enable-fixedwidth-autovec-in-streaming-mode - | FileCheck %s --check-prefix=CHECK-FORCE-FIXEDWIDTH
 
 target triple = "aarch64-unknown-linux-gnu"



More information about the llvm-commits mailing list