[llvm] [LV] Add flag to always force a scalable VF when feasible. (PR #182467)

Fri Feb 20 02:16:55 PST 2026

https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/182467

`-scalable-vectorization=always` means the LV will always favour using scalable VFs when the cost is valid and scalable vectors are feasible.

This is mostly a hidden flag for experimentation purposes.

>From 79d8165bbeb38fe8dca2de2c8913e1c43d9c4b54 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 19 Feb 2026 17:19:21 +0000
Subject: [PATCH] [LV] Add flag to always force a scalable VF when feasible.

Compiling with `-scalable-vectorization=always` would always favour
using scalable VFs when the cost is valid and scalable vectors are
feasible.

This is mostly a hidden flag for experimentation purposes.
---
 .../Vectorize/LoopVectorizationLegality.h     | 18 +++++-
 .../Vectorize/LoopVectorizationLegality.cpp   |  6 +-
 .../Transforms/Vectorize/LoopVectorize.cpp    |  6 ++
 .../force-scalable-vectorization-always.ll    | 59 +++++++++++++++++++
 4 files changed, 85 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/force-scalable-vectorization-always.ll

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index f82fc588639dd..791bbbf822d63 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -118,7 +118,11 @@ class LoopVectorizeHints {
     /// Vectorize loops using scalable vectors or fixed-width vectors, but favor
     /// scalable vectors when the cost-model is inconclusive. This is the
     /// default when the scalable.enable hint is enabled through a pragma.
-    SK_PreferScalable = 1
+    SK_PreferScalable = 1,
+    /// Always vectorize loops using scalable vectors if feasible (i.e. the plan
+    /// has a valid cost and is not restricted by fixed-length dependence
+    /// distances).
+    SK_AlwaysScalable = 2
   };
 
   LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
@@ -135,8 +139,10 @@ class LoopVectorizeHints {
   void emitRemarkWithHints() const;
 
   ElementCount getWidth() const {
-    return ElementCount::get(Width.Value, (ScalableForceKind)Scalable.Value ==
-                                              SK_PreferScalable);
+    return ElementCount::get(
+        Width.Value,
+        (ScalableForceKind)Scalable.Value == SK_PreferScalable ||
+            (ScalableForceKind)Scalable.Value == SK_AlwaysScalable);
   }
 
   unsigned getInterleave() const {
@@ -162,6 +168,12 @@ class LoopVectorizeHints {
     return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly;
   }
 
+  /// \return true if scalable vectorization is always preferred over
+  /// fixed-length when feasible, regardless of cost.
+  bool isScalableVectorizationAlwaysPreferred() const {
+    return (ScalableForceKind)Scalable.Value == SK_AlwaysScalable;
+  }
+
   /// If hints are provided that force vectorization, use the AlwaysPrint
   /// pass name to force the frontend to print the diagnostic.
   const char *vectorizeAnalysisPassName() const;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index e57e0cf636501..b2cc78edd24d1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -78,7 +78,11 @@ static cl::opt<LoopVectorizeHints::ScalableForceKind>
             clEnumValN(
                 LoopVectorizeHints::SK_PreferScalable, "on",
                 "Scalable vectorization is available and favored when the "
-                "cost is inconclusive.")));
+                "cost is inconclusive."),
+            clEnumValN(
+                LoopVectorizeHints::SK_AlwaysScalable, "always",
+                "Scalable vectorization is available and always favored when "
+                "feasible")));
 
 static cl::opt<bool> EnableHistogramVectorization(
     "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 69d2b9f2c1a28..47de1c7da3464 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3943,6 +3943,12 @@ bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A,
       EstimatedWidthB *= *VScale;
   }
 
+  // When there is a hint to always prefer scalable vectors,
+  // honour that hint.
+  if (Hints.isScalableVectorizationAlwaysPreferred())
+    if (A.Width.isScalable() && A.Cost.isValid() && !B.Width.isScalable())
+      return true;
+
   // When optimizing for size choose whichever is smallest, which will be the
   // one with the smallest cost for the whole loop. On a tie pick the larger
   // vector width, on the assumption that throughput will be greater.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-scalable-vectorization-always.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-scalable-vectorization-always.ll
new file mode 100644
index 0000000000000..14beeeffbec51
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-scalable-vectorization-always.ll
@@ -0,0 +1,59 @@
+; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -scalable-vectorization=always -debug-only=loop-vectorize -disable-output %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target triple = "aarch64"
+
+; Check that the cost of a fixed-VF is lower than that of comparable scalable VFs,
+; but that a scalable VF is still chosen (due to the `-scalable-vectorization=always` flag)
+define i32 @cost_prefers_fixed_width_vf_but_force_scalable_vf(ptr noalias %dst, ptr noalias %src, i64 %n) "target-cpu"="neoverse-n2" {
+; CHECK: Checking a loop in 'cost_prefers_fixed_width_vf_but_force_scalable_vf'
+; CHECK: Cost for VF 2: 11 (Estimated cost per lane: 5.5)
+; CHECK: Cost for VF 4: 10 (Estimated cost per lane: 2.5)
+; CHECK: Cost for VF 8: 10 (Estimated cost per lane: 1.2)
+; CHECK: Cost for VF vscale x 1: Invalid (Estimated cost per lane: Invalid)
+; CHECK: Cost for VF vscale x 2: 8 (Estimated cost per lane: 4.0)
+; CHECK: Cost for VF vscale x 4: 10 (Estimated cost per lane: 2.5)
+; CHECK: VPlan 'Final VPlan for VF={vscale x 4},UF={2}' {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ %n, %entry ], [ %iv.next, %loop ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ]
+  %uniform.load = load i16, ptr %src, align 2
+  %ext = sext i16 %uniform.load to i32
+  %sum.next = add i32 %sum, %ext
+  %dst.gep = getelementptr i16, ptr %dst, i64 %iv
+  store i16 %uniform.load, ptr %dst.gep, align 2
+  %iv.next = add i64 %iv, -1
+  %cmp = icmp ugt i64 %iv, 0
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum.next
+}
+
+; Test that with '-scalable-vectorization=always', we still fall back to NEON
+; if we can't vectorize with SVE (in this case, because SVE is unavailable)
+define i32 @no_sve_fallback_to_neon(ptr %src, i64 %n) "target-features"="+neon" {
+; CHECK: Checking a loop in 'no_sve_fallback_to_neon'
+; CHECK: Cost for VF 2: 4 (Estimated cost per lane: 2.0)
+; CHECK: Cost for VF 4: 4 (Estimated cost per lane: 1.0)
+; CHECK-NOT: Cost for VF
+; CHECK: VPlan 'Final VPlan for VF={2,4},UF={2}' {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ]
+  %src.gep = getelementptr i32, ptr %src, i64 %iv
+  %load = load i32, ptr %src.gep, align 4
+  %sum.next = add i32 %sum, %load
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp ult i64 %iv, %n
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum.next
+}