[llvm] [AArch64] Set MaxInterleaving to 4 for Neoverse V2 (PR #100385)

Wed Jul 24 07:10:43 PDT 2024

llvmbot wrote:



@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-transforms

Author: Sjoerd Meijer (sjoerdmeijer)

<details>
<summary>Changes</summary>

This helps loop based workloads and benchmarks quite a lot, SPEC INT is unaffected.

---
Full diff: https://github.com/llvm/llvm-project/pull/100385.diff


3 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.cpp (+3-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll (+1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll (+1) 


``````````diff

diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 32a355fe38f1c..280083ae824cd 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -233,9 +233,11 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
     PrefLoopAlignment = Align(32);
     MaxBytesForLoopAlignment = 16;
     break;
+  case NeoverseV2:
+    MaxInterleaveFactor = 4;
+    LLVM_FALLTHROUGH;
   case NeoverseN2:
   case NeoverseN3:
-  case NeoverseV2:
   case NeoverseV3:
     PrefFunctionAlignment = Align(16);
     PrefLoopAlignment = Align(32);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll
index 0e54bd15e5ea5..6a47e0ea34a50 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll
@@ -5,6 +5,7 @@
 ; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-macos -mcpu=apple-a14 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
 ; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-macos -mcpu=apple-a15 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
 ; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-macos -mcpu=apple-a16 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
+; RUN: opt -passes=loop-vectorize -mtriple=arm64 -mcpu=neoverse-v2 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
 
 ; Tests for selecting interleave counts for loops with loads and stores.
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll
index 72d528d8748ba..9211cdfaebd15 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll
@@ -5,6 +5,7 @@
 ; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-macos -mcpu=apple-a14 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
 ; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-macos -mcpu=apple-a15 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
 ; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-macos -mcpu=apple-a16 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
+; RUN: opt -passes=loop-vectorize -mtriple=arm64 -mcpu=neoverse-v2 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
 
 ; Tests for selecting the interleave count for loops with reductions.
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/100385