[llvm] [LV] Add a statistic for early exit vectorization (PR #145730)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 26 10:38:14 PDT 2025
https://github.com/annamthomas updated https://github.com/llvm/llvm-project/pull/145730
>From 0fc357fa60d8c4b1d36fd74693a0fe5f1b70cd94 Mon Sep 17 00:00:00 2001
From: Anna Thomas <anna at azul.com>
Date: Wed, 25 Jun 2025 11:44:56 -0400
Subject: [PATCH] [LV] Add a statistic for early exit vectorization
We currently do not vectorize the epilog loops with early-exits, but the
stats are updated there as well for completeness.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 19 +++++---
.../Transforms/LoopVectorize/vect.stats.ll | 47 ++++++++++++++++---
2 files changed, 53 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5eda2003920e6..19cc85a3793f1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -175,6 +175,7 @@ const char LLVMLoopVectorizeFollowupEpilogue[] =
STATISTIC(LoopsVectorized, "Number of loops vectorized");
STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
STATISTIC(LoopsEpilogueVectorized, "Number of epilogues vectorized");
+STATISTIC(LoopsEarlyExitVectorized, "Number of early exit loops vectorized");
static cl::opt<bool> EnableEpilogueVectorization(
"enable-epilogue-vectorization", cl::init(true), cl::Hidden,
@@ -7324,6 +7325,11 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
"Trying to execute plan with unsupported VF");
assert(BestVPlan.hasUF(BestUF) &&
"Trying to execute plan with unsupported UF");
+ ++LoopsVectorized;
+ if (BestVPlan.hasEarlyExit())
+ ++LoopsEarlyExitVectorized;
+ if (VectorizingEpilogue)
+ ++LoopsEpilogueVectorized;
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
// cost model is complete for better cost estimates.
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
@@ -10259,7 +10265,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1),
ElementCount::getFixed(1), IC, &CM, BFI, PSI, Checks, BestPlan);
- LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
+ LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT,
+ /*VectorizingEpilogue*/ false);
ORE->emit([&]() {
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -10288,9 +10295,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI, &CM, BFI, PSI, Checks,
*BestMainPlan);
- auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
- *BestMainPlan, MainILV, DT, false);
- ++LoopsVectorized;
+ auto ExpandedSCEVs =
+ LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan,
+ MainILV, DT, /*VectorizingEpilogue*/ false);
// Second pass vectorizes the epilogue and adjusts the control flow
// edges from the first pass.
@@ -10303,7 +10310,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
preparePlanForEpilogueVectorLoop(BestEpiPlan, L, ExpandedSCEVs, EPI);
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
- DT, true);
+ DT, /*VectorizingEpilogue*/ true);
// Fix induction resume values from the additional bypass block.
BasicBlock *BypassBlock = EpilogILV.getAdditionalBypassBlock();
@@ -10318,7 +10325,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// TODO: Directly add as extra operand to the VPResumePHI recipe.
Inc->setIncomingValueForBlock(BypassBlock, V);
}
- ++LoopsEpilogueVectorized;
if (!Checks.hasChecks())
DisableRuntimeUnroll = true;
@@ -10327,7 +10333,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VF.MinProfitableTripCount, IC, &CM, BFI, PSI,
Checks, BestPlan);
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
- ++LoopsVectorized;
// Add metadata to disable runtime unrolling a scalar loop when there
// are no runtime checks about strides and memory. A scalar loop that is
diff --git a/llvm/test/Transforms/LoopVectorize/vect.stats.ll b/llvm/test/Transforms/LoopVectorize/vect.stats.ll
index 9a55dc99c316b..b9b2cf757c0f7 100644
--- a/llvm/test/Transforms/LoopVectorize/vect.stats.ll
+++ b/llvm/test/Transforms/LoopVectorize/vect.stats.ll
@@ -1,12 +1,12 @@
-; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize --disable-output -stats -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -enable-early-exit-vectorization=1 --disable-output -stats -S 2>&1 | FileCheck %s
; REQUIRES: asserts
-;
-; We have 2 loops, one of them is vectorizable and the second one is not.
-;
+; We have 3 loops, two of them are vectorizable (with one being early-exit
+; vectorized) and the third one is not.
-; CHECK: 2 loop-vectorize - Number of loops analyzed for vectorization
-; CHECK: 1 loop-vectorize - Number of loops vectorized
+; CHECK: 3 loop-vectorize - Number of loops analyzed for vectorization
+; CHECK: 1 loop-vectorize - Number of early exit loops vectorized
+; CHECK: 2 loop-vectorize - Number of loops vectorized
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -31,6 +31,39 @@ for.end: ; preds = %entry, %for.body
ret void
}
+define i32 @early_exit_vectorized(i32 %end) {
+entry:
+ %p1 = alloca [1024 x i32]
+ %p2 = alloca [1024 x i32]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ %end.clamped = and i32 %end, 1023
+ br label %for.body
+
+for.body:
+ %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ]
+ %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ]
+ %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind
+ %0 = load i32, ptr %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind
+ %1 = load i32, ptr %arrayidx2, align 4
+ %cmp.early = icmp eq i32 %0, %1
+ br i1 %cmp.early, label %found, label %for.inc
+
+for.inc:
+ %ind.next = add i8 %ind, 1
+ %conv = zext i8 %ind.next to i32
+ %gep.ind.next = add i64 %gep.ind, 1
+ %cmp = icmp ult i32 %conv, %end.clamped
+ br i1 %cmp, label %for.body, label %exit
+
+found:
+ ret i32 1
+
+exit:
+ ret i32 0
+}
+
define void @not_vectorized(ptr nocapture %a, i64 %size) {
entry:
%cmp1 = icmp sle i64 %size, 0
@@ -56,3 +89,5 @@ for.body: ; preds = %entry, %for.body
for.end: ; preds = %entry, %for.body
ret void
}
+
+declare void @init_mem(ptr, i64);
More information about the llvm-commits
mailing list