[llvm] [LoopVectorizer][AArch64] Add a -sve-vscale-for-tuning override option. (PR #156916)

Mon Sep 8 03:33:55 PDT 2025

https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/156916

>From fb034f022e5981517c7c1aa77994768b7bfc6725 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 4 Sep 2025 16:53:50 +0100
Subject: [PATCH 1/2] [LoopVectorizer] Add a -force-vscale-for-tuning override
 option.

It can be useful for debugging and tuning to be able to alter the
VScaleForTuning. This adds a quick option to the vectorizer for it
---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp          | 9 +++++++++
 .../AArch64/scalable-vectorization-cost-tuning.ll        | 4 ++++
 2 files changed, 13 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d78e190e8bf7b..32916492a9b41 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -294,6 +294,10 @@ static cl::opt<bool> ForceTargetSupportsScalableVectors(
         "Pretend that scalable vectors are supported, even if the target does "
         "not support them. This flag should only be used for testing."));
 
+static cl::opt<unsigned> VScaleForTuningOpt(
+    "force-vscale-for-tuning", cl::Hidden,
+    cl::desc("Force a vscale for tuning factor in the loop vectorizer"));
+
 static cl::opt<unsigned> SmallLoopCost(
     "small-loop-cost", cl::init(20), cl::Hidden,
     cl::desc(
@@ -1463,6 +1467,11 @@ class LoopVectorizationCostModel {
   /// vscale_range.min == vscale_range.max then return vscale_range.max, else
   /// return the value returned by the corresponding TTI method.
   void initializeVScaleForTuning() {
+    if (VScaleForTuningOpt.getNumOccurrences()) {
+      VScaleForTuning = VScaleForTuningOpt;
+      return;
+    }
+
     const Function *Fn = TheLoop->getHeader()->getParent();
     if (Fn->hasFnAttribute(Attribute::VScaleRange)) {
       auto Attr = Fn->getFnAttribute(Attribute::VScaleRange);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
index c4aee69db70b3..16d3786681ffa 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
@@ -7,6 +7,10 @@
 ; RUN:     -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output < %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefixes=VSCALEFORTUNING1
 
+; RUN: opt -mtriple=aarch64 -mattr=+sve -mcpu=generic -force-vscale-for-tuning=2 \
+; RUN:     -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output < %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefixes=VSCALEFORTUNING2
+
 ; RUN: opt -mtriple=aarch64 -mcpu=neoverse-v1 \
 ; RUN:     -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output < %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefixes=VSCALEFORTUNING2

>From bccc85bf23bea43e2c7c5b9fb1d1bf0be573a297 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 8 Sep 2025 11:32:56 +0100
Subject: [PATCH 2/2] Move to AArch64

---
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp             | 6 ++++++
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp          | 9 ---------
 .../AArch64/scalable-vectorization-cost-tuning.ll        | 2 +-
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 0f4f0129e9cd3..98e0a1180510c 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -91,6 +91,10 @@ static cl::opt<bool> EnableZPRPredicateSpills(
     cl::desc(
         "Enables spilling/reloading SVE predicates as data vectors (ZPRs)"));
 
+static cl::opt<unsigned>
+    VScaleForTuningOpt("sve-vscale-for-tuning", cl::Hidden,
+                       cl::desc("Force a vscale for tuning factor for SVE"));
+
 // Subreg liveness tracking is disabled by default for now until all issues
 // are ironed out. This option allows the feature to be used in tests.
 static cl::opt<bool>
@@ -364,6 +368,8 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
 
   if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
     MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
+  if (VScaleForTuningOpt.getNumOccurrences() > 0)
+    VScaleForTuning = VScaleForTuningOpt;
 }
 
 AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 32916492a9b41..d78e190e8bf7b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -294,10 +294,6 @@ static cl::opt<bool> ForceTargetSupportsScalableVectors(
         "Pretend that scalable vectors are supported, even if the target does "
         "not support them. This flag should only be used for testing."));
 
-static cl::opt<unsigned> VScaleForTuningOpt(
-    "force-vscale-for-tuning", cl::Hidden,
-    cl::desc("Force a vscale for tuning factor in the loop vectorizer"));
-
 static cl::opt<unsigned> SmallLoopCost(
     "small-loop-cost", cl::init(20), cl::Hidden,
     cl::desc(
@@ -1467,11 +1463,6 @@ class LoopVectorizationCostModel {
   /// vscale_range.min == vscale_range.max then return vscale_range.max, else
   /// return the value returned by the corresponding TTI method.
   void initializeVScaleForTuning() {
-    if (VScaleForTuningOpt.getNumOccurrences()) {
-      VScaleForTuning = VScaleForTuningOpt;
-      return;
-    }
-
     const Function *Fn = TheLoop->getHeader()->getParent();
     if (Fn->hasFnAttribute(Attribute::VScaleRange)) {
       auto Attr = Fn->getFnAttribute(Attribute::VScaleRange);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
index 16d3786681ffa..c1b175f39e852 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
@@ -7,7 +7,7 @@
 ; RUN:     -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output < %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefixes=VSCALEFORTUNING1
 
-; RUN: opt -mtriple=aarch64 -mattr=+sve -mcpu=generic -force-vscale-for-tuning=2 \
+; RUN: opt -mtriple=aarch64 -mattr=+sve -mcpu=generic -sve-vscale-for-tuning=2 \
 ; RUN:     -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output < %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefixes=VSCALEFORTUNING2