[llvm] [LV] Add flag to always force a scalable VF when feasible. (PR #182467)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 20 02:16:55 PST 2026
https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/182467
`-scalable-vectorization=always` means the LV will always favour using scalable VFs when the cost is valid and scalable vectors are feasible.
This is mostly a hidden flag for experimentation purposes.
>From 79d8165bbeb38fe8dca2de2c8913e1c43d9c4b54 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 19 Feb 2026 17:19:21 +0000
Subject: [PATCH] [LV] Add flag to always force a scalable VF when feasible.
Compiling with `-scalable-vectorization=always` would always favour
using scalable VFs when the cost is valid and scalable vectors are
feasible.
This is mostly a hidden flag for experimentation purposes.
---
.../Vectorize/LoopVectorizationLegality.h | 18 +++++-
.../Vectorize/LoopVectorizationLegality.cpp | 6 +-
.../Transforms/Vectorize/LoopVectorize.cpp | 6 ++
.../force-scalable-vectorization-always.ll | 59 +++++++++++++++++++
4 files changed, 85 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/force-scalable-vectorization-always.ll
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index f82fc588639dd..791bbbf822d63 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -118,7 +118,11 @@ class LoopVectorizeHints {
/// Vectorize loops using scalable vectors or fixed-width vectors, but favor
/// scalable vectors when the cost-model is inconclusive. This is the
/// default when the scalable.enable hint is enabled through a pragma.
- SK_PreferScalable = 1
+ SK_PreferScalable = 1,
+ /// Always vectorize loops using scalable vectors if feasible (i.e. the plan
+ /// has a valid cost and is not restricted by fixed-length dependence
+ /// distances).
+ SK_AlwaysScalable = 2
};
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
@@ -135,8 +139,10 @@ class LoopVectorizeHints {
void emitRemarkWithHints() const;
ElementCount getWidth() const {
- return ElementCount::get(Width.Value, (ScalableForceKind)Scalable.Value ==
- SK_PreferScalable);
+ return ElementCount::get(
+ Width.Value,
+ (ScalableForceKind)Scalable.Value == SK_PreferScalable ||
+ (ScalableForceKind)Scalable.Value == SK_AlwaysScalable);
}
unsigned getInterleave() const {
@@ -162,6 +168,12 @@ class LoopVectorizeHints {
return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly;
}
+ /// \return true if scalable vectorization is always preferred over
+ /// fixed-length when feasible, regardless of cost.
+ bool isScalableVectorizationAlwaysPreferred() const {
+ return (ScalableForceKind)Scalable.Value == SK_AlwaysScalable;
+ }
+
/// If hints are provided that force vectorization, use the AlwaysPrint
/// pass name to force the frontend to print the diagnostic.
const char *vectorizeAnalysisPassName() const;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index e57e0cf636501..b2cc78edd24d1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -78,7 +78,11 @@ static cl::opt<LoopVectorizeHints::ScalableForceKind>
clEnumValN(
LoopVectorizeHints::SK_PreferScalable, "on",
"Scalable vectorization is available and favored when the "
- "cost is inconclusive.")));
+ "cost is inconclusive."),
+ clEnumValN(
+ LoopVectorizeHints::SK_AlwaysScalable, "always",
+ "Scalable vectorization is available and always favored when "
+ "feasible")));
static cl::opt<bool> EnableHistogramVectorization(
"enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 69d2b9f2c1a28..47de1c7da3464 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3943,6 +3943,12 @@ bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A,
EstimatedWidthB *= *VScale;
}
+ // When there is a hint to always prefer scalable vectors,
+ // honour that hint.
+ if (Hints.isScalableVectorizationAlwaysPreferred())
+ if (A.Width.isScalable() && A.Cost.isValid() && !B.Width.isScalable())
+ return true;
+
// When optimizing for size choose whichever is smallest, which will be the
// one with the smallest cost for the whole loop. On a tie pick the larger
// vector width, on the assumption that throughput will be greater.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-scalable-vectorization-always.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-scalable-vectorization-always.ll
new file mode 100644
index 0000000000000..14beeeffbec51
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-scalable-vectorization-always.ll
@@ -0,0 +1,59 @@
+; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -scalable-vectorization=always -debug-only=loop-vectorize -disable-output %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target triple = "aarch64"
+
+; Check that the cost of a fixed-VF is lower than that of comparable scalable VFs,
+; but that a scalable VF is still chosen (due to the `-scalable-vectorization=always` flag)
+define i32 @cost_prefers_fixed_width_vf_but_force_scalable_vf(ptr noalias %dst, ptr noalias %src, i64 %n) "target-cpu"="neoverse-n2" {
+; CHECK: Checking a loop in 'cost_prefers_fixed_width_vf_but_force_scalable_vf'
+; CHECK: Cost for VF 2: 11 (Estimated cost per lane: 5.5)
+; CHECK: Cost for VF 4: 10 (Estimated cost per lane: 2.5)
+; CHECK: Cost for VF 8: 10 (Estimated cost per lane: 1.2)
+; CHECK: Cost for VF vscale x 1: Invalid (Estimated cost per lane: Invalid)
+; CHECK: Cost for VF vscale x 2: 8 (Estimated cost per lane: 4.0)
+; CHECK: Cost for VF vscale x 4: 10 (Estimated cost per lane: 2.5)
+; CHECK: VPlan 'Final VPlan for VF={vscale x 4},UF={2}' {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ %n, %entry ], [ %iv.next, %loop ]
+ %sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ]
+ %uniform.load = load i16, ptr %src, align 2
+ %ext = sext i16 %uniform.load to i32
+ %sum.next = add i32 %sum, %ext
+ %dst.gep = getelementptr i16, ptr %dst, i64 %iv
+ store i16 %uniform.load, ptr %dst.gep, align 2
+ %iv.next = add i64 %iv, -1
+ %cmp = icmp ugt i64 %iv, 0
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %sum.next
+}
+
+; Test that with '-scalable-vectorization=always', we still fall back to NEON
+; if we can't vectorize with SVE (in this case, because SVE is unavailable)
+define i32 @no_sve_fallback_to_neon(ptr %src, i64 %n) "target-features"="+neon" {
+; CHECK: Checking a loop in 'no_sve_fallback_to_neon'
+; CHECK: Cost for VF 2: 4 (Estimated cost per lane: 2.0)
+; CHECK: Cost for VF 4: 4 (Estimated cost per lane: 1.0)
+; CHECK-NOT: Cost for VF
+; CHECK: VPlan 'Final VPlan for VF={2,4},UF={2}' {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ]
+ %src.gep = getelementptr i32, ptr %src, i64 %iv
+ %load = load i32, ptr %src.gep, align 4
+ %sum.next = add i32 %sum, %load
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp ult i64 %iv, %n
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %sum.next
+}
More information about the llvm-commits
mailing list