[llvm] [VPlan] Fix LastActiveLane assertion on scalar VF (PR #167897)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 17 00:53:22 PST 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/167897
>From 1e5f44eb7ee71c285bf803106307a05cdd2b4b48 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 14 Nov 2025 00:06:41 +0800
Subject: [PATCH 1/6] [VPlan] Fix LastActiveLane assertion on scalar VF
For a scalar only VPlan with tail folding, if it has a phi live out then legalizeAndOptimizeInductions will scalarize the widened canonical IV feeding into the header mask:
<x1> vector loop: {
vector.body:
EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
vp<%5> = SCALAR-STEPS vp<%4>, ir<1>, vp<%0>
EMIT vp<%6> = icmp ule vp<%5>, vp<%3>
EMIT vp<%index.next> = add nuw vp<%4>, vp<%1>
EMIT branch-on-count vp<%index.next>, vp<%2>
No successors
}
Successor(s): middle.block
middle.block:
EMIT vp<%8> = last-active-lane vp<%6>
EMIT vp<%9> = extract-lane vp<%8>, vp<%5>
Successor(s): ir-bb<exit>
The verifier complains about this but this should still generate the correct last active lane, so this fixes the assert by handling this case in isHeaderMask. There is a similar pattern already there for ActiveLaneMask, which also expects a VPScalarIVSteps recipe.
Fixes #167813
---
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 7 +++
.../tail-folding-live-out-scalar-vf.ll | 60 +++++++++++++++++++
2 files changed, 67 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/tail-folding-live-out-scalar-vf.ll
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index e22c5dfdb9f38..c9de9b82bca7c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -66,6 +66,13 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
m_One(), m_Specific(&Plan.getVF()))) ||
IsWideCanonicalIV(A));
+ if (match(V,
+ m_ICmp(m_ScalarIVSteps(
+ m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
+ m_One(), m_Specific(&Plan.getVF())),
+ m_Specific(Plan.getBackedgeTakenCount()))))
+ return true;
+
return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
B == Plan.getBackedgeTakenCount();
}
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-live-out-scalar-vf.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-live-out-scalar-vf.ll
new file mode 100644
index 0000000000000..5964cf45fb6be
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-live-out-scalar-vf.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt -p loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -force-vector-width=1 -force-vector-interleave=2 -S %s | FileCheck %s
+
+define i64 @live_out_scalar_vf(i64 %n) {
+; CHECK-LABEL: define i64 @live_out_scalar_vf(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP1]], [[TRIP_COUNT_MINUS_1]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP2]], [[TRIP_COUNT_MINUS_1]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i1 [[TMP4]], false
+; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 1, [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i1 [[TMP3]], false
+; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], 1
+; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 [[TMP11]], i64 [[TMP8]]
+; CHECK-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[TMP13]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[LAST_ACTIVE_LANE]], 1
+; CHECK-NEXT: [[TMP15:%.*]] = icmp uge i64 [[LAST_ACTIVE_LANE]], 1
+; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i64 [[TMP2]], i64 [[TMP1]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i64 [[TMP16]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+ br label %latch
+
+latch:
+ ; Need to use a phi otherwise the header mask will use a
+ ; VPWidenCanonicalIVRecipe instead of a VPScalarIVStepsRecipe.
+ %exitval = phi i64 [ %iv, %loop ]
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i64 %exitval
+}
+
>From 3863112c91c94b8da52037124676882c44d26e2a Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 14 Nov 2025 00:39:22 +0800
Subject: [PATCH 2/6] Remove trivial branch
---
.../tail-folding-live-out-scalar-vf.ll | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-live-out-scalar-vf.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-live-out-scalar-vf.ll
index 5964cf45fb6be..21b26c1ddec3d 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-live-out-scalar-vf.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-live-out-scalar-vf.ll
@@ -15,8 +15,9 @@ define i64 @live_out_scalar_vf(i64 %n) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP2]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP1]], [[TRIP_COUNT_MINUS_1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP2]], [[TRIP_COUNT_MINUS_1]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
@@ -33,23 +34,23 @@ define i64 @live_out_scalar_vf(i64 %n) {
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 [[TMP11]], i64 [[TMP8]]
; CHECK-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[TMP13]], 1
; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[LAST_ACTIVE_LANE]], 1
-; CHECK-NEXT: [[TMP15:%.*]] = icmp uge i64 [[LAST_ACTIVE_LANE]], 1
+; CHECK-NEXT: [[TMP17:%.*]] = sub i64 [[TMP14]], 1
+; CHECK-NEXT: [[TMP15:%.*]] = icmp uge i64 [[TMP14]], 1
; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i64 [[TMP2]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[LAST_ACTIVE_LANE]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 [[VECTOR_RECUR]], i64 [[TMP16]]
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret i64 [[TMP16]]
+; CHECK-NEXT: ret i64 [[TMP19]]
;
entry:
br label %loop
loop:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
- br label %latch
-
-latch:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
; Need to use a phi otherwise the header mask will use a
; VPWidenCanonicalIVRecipe instead of a VPScalarIVStepsRecipe.
- %exitval = phi i64 [ %iv, %loop ]
+ %exitval = phi i64 [ 0, %entry ], [ %iv, %loop ]
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, %n
br i1 %ec, label %exit, label %loop
>From 23bce03a6741a1ae1a597c678fd7dff6df378fec Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 14 Nov 2025 14:48:15 +0800
Subject: [PATCH 3/6] Add comment and assert
---
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index c9de9b82bca7c..476b51d4d0951 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -66,12 +66,16 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
m_One(), m_Specific(&Plan.getVF()))) ||
IsWideCanonicalIV(A));
+ // For scalar plans, the header mask uses the scalar steps.
if (match(V,
m_ICmp(m_ScalarIVSteps(
m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
m_One(), m_Specific(&Plan.getVF())),
- m_Specific(Plan.getBackedgeTakenCount()))))
+ m_Specific(Plan.getBackedgeTakenCount())))) {
+ assert(Plan.hasScalarVFOnly() &&
+ "Non-scalar VF using scalar IV steps for header mask?");
return true;
+ }
return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
B == Plan.getBackedgeTakenCount();
>From 67bb40220ac5c4046755a0edf7da481ff4ca4bf4 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 14 Nov 2025 14:59:23 +0800
Subject: [PATCH 4/6] Update test after merging
---
.../tail-folding-live-out-scalar-vf.ll | 48 ++++++++-----------
1 file changed, 20 insertions(+), 28 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-live-out-scalar-vf.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-live-out-scalar-vf.ll
index 21b26c1ddec3d..ea1383946102e 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-live-out-scalar-vf.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-live-out-scalar-vf.ll
@@ -4,43 +4,35 @@
define i64 @live_out_scalar_vf(i64 %n) {
; CHECK-LABEL: define i64 @live_out_scalar_vf(
; CHECK-SAME: i64 [[N:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP2]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP1]], [[TRIP_COUNT_MINUS_1]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP2]], [[TRIP_COUNT_MINUS_1]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i1 [[TMP4]], false
-; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 1, [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i1 [[TMP3]], false
-; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i64
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], 1
-; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 [[TMP11]], i64 [[TMP8]]
-; CHECK-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[TMP13]], 1
-; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[LAST_ACTIVE_LANE]], 1
-; CHECK-NEXT: [[TMP17:%.*]] = sub i64 [[TMP14]], 1
-; CHECK-NEXT: [[TMP15:%.*]] = icmp uge i64 [[TMP14]], 1
-; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i64 [[TMP2]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[LAST_ACTIVE_LANE]], 0
-; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 [[VECTOR_RECUR]], i64 [[TMP16]]
-; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[EXITVAL:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[IV]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[TMP19:%.*]] = phi i64 [ [[EXITVAL]], %[[LOOP]] ], [ [[INDEX]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[TMP19]]
;
entry:
>From efca5e5a1abe71378f0864896a59ab62c2673bb7 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 17 Nov 2025 11:48:18 +0800
Subject: [PATCH 5/6] Add m_CanonicalScalarIVSteps helper
---
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 976b094fd4ddb..084e1bc12d5b9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -46,6 +46,12 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
return Expanded;
}
+static inline auto m_CanonicalScalarIVSteps(const VPlan &Plan) {
+ return m_ScalarIVSteps(
+ m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()), m_One(),
+ m_Specific(&Plan.getVF()));
+}
+
bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
if (isa<VPActiveLaneMaskPHIRecipe>(V))
return true;
@@ -60,18 +66,11 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One())))
return B == Plan.getTripCount() &&
- (match(A,
- m_ScalarIVSteps(
- m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
- m_One(), m_Specific(&Plan.getVF()))) ||
- IsWideCanonicalIV(A));
+ (match(A, m_CanonicalScalarIVSteps(Plan)) || IsWideCanonicalIV(A));
// For scalar plans, the header mask uses the scalar steps.
- if (match(V,
- m_ICmp(m_ScalarIVSteps(
- m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
- m_One(), m_Specific(&Plan.getVF())),
- m_Specific(Plan.getBackedgeTakenCount())))) {
+ if (match(V, m_ICmp(m_CanonicalScalarIVSteps(Plan),
+ m_Specific(Plan.getBackedgeTakenCount())))) {
assert(Plan.hasScalarVFOnly() &&
"Non-scalar VF using scalar IV steps for header mask?");
return true;
>From c0a70672c2a9f64b4efcc71ee786eb2234f9261e Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 17 Nov 2025 16:53:03 +0800
Subject: [PATCH 6/6] Move to variable
---
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 084e1bc12d5b9..3bc2dfd623777 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -46,12 +46,6 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
return Expanded;
}
-static inline auto m_CanonicalScalarIVSteps(const VPlan &Plan) {
- return m_ScalarIVSteps(
- m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()), m_One(),
- m_Specific(&Plan.getVF()));
-}
-
bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
if (isa<VPActiveLaneMaskPHIRecipe>(V))
return true;
@@ -64,12 +58,16 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
VPValue *A, *B;
+ auto m_CanonicalScalarIVSteps =
+ m_ScalarIVSteps(m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
+ m_One(), m_Specific(&Plan.getVF()));
+
if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One())))
return B == Plan.getTripCount() &&
- (match(A, m_CanonicalScalarIVSteps(Plan)) || IsWideCanonicalIV(A));
+ (match(A, m_CanonicalScalarIVSteps) || IsWideCanonicalIV(A));
// For scalar plans, the header mask uses the scalar steps.
- if (match(V, m_ICmp(m_CanonicalScalarIVSteps(Plan),
+ if (match(V, m_ICmp(m_CanonicalScalarIVSteps,
m_Specific(Plan.getBackedgeTakenCount())))) {
assert(Plan.hasScalarVFOnly() &&
"Non-scalar VF using scalar IV steps for header mask?");
More information about the llvm-commits
mailing list