[llvm] [LV] Add a statistic for early exit vectorization (PR #145730)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 26 10:38:14 PDT 2025


https://github.com/annamthomas updated https://github.com/llvm/llvm-project/pull/145730

>From 0fc357fa60d8c4b1d36fd74693a0fe5f1b70cd94 Mon Sep 17 00:00:00 2001
From: Anna Thomas <anna at azul.com>
Date: Wed, 25 Jun 2025 11:44:56 -0400
Subject: [PATCH] [LV] Add a statistic for early exit vectorization

We currently do not vectorize the epilog loops with early-exits, but the
stats are updated there as well for completeness.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 19 +++++---
 .../Transforms/LoopVectorize/vect.stats.ll    | 47 ++++++++++++++++---
 2 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5eda2003920e6..19cc85a3793f1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -175,6 +175,7 @@ const char LLVMLoopVectorizeFollowupEpilogue[] =
 STATISTIC(LoopsVectorized, "Number of loops vectorized");
 STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
 STATISTIC(LoopsEpilogueVectorized, "Number of epilogues vectorized");
+STATISTIC(LoopsEarlyExitVectorized, "Number of early exit loops vectorized");
 
 static cl::opt<bool> EnableEpilogueVectorization(
     "enable-epilogue-vectorization", cl::init(true), cl::Hidden,
@@ -7324,6 +7325,11 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
          "Trying to execute plan with unsupported VF");
   assert(BestVPlan.hasUF(BestUF) &&
          "Trying to execute plan with unsupported UF");
+  ++LoopsVectorized;
+  if (BestVPlan.hasEarlyExit())
+    ++LoopsEarlyExitVectorized;
+  if (VectorizingEpilogue)
+    ++LoopsEpilogueVectorized;
   // TODO: Move to VPlan transform stage once the transition to the VPlan-based
   // cost model is complete for better cost estimates.
   VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
@@ -10259,7 +10265,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
           L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1),
           ElementCount::getFixed(1), IC, &CM, BFI, PSI, Checks, BestPlan);
 
-      LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
+      LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT,
+                      /*VectorizingEpilogue*/ false);
 
       ORE->emit([&]() {
         return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -10288,9 +10295,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
         EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
                                            EPI, &CM, BFI, PSI, Checks,
                                            *BestMainPlan);
-        auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
-                                             *BestMainPlan, MainILV, DT, false);
-        ++LoopsVectorized;
+        auto ExpandedSCEVs =
+            LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan,
+                            MainILV, DT, /*VectorizingEpilogue*/ false);
 
         // Second pass vectorizes the epilogue and adjusts the control flow
         // edges from the first pass.
@@ -10303,7 +10310,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
         preparePlanForEpilogueVectorLoop(BestEpiPlan, L, ExpandedSCEVs, EPI);
 
         LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
-                        DT, true);
+                        DT, /*VectorizingEpilogue*/ true);
 
         // Fix induction resume values from the additional bypass block.
         BasicBlock *BypassBlock = EpilogILV.getAdditionalBypassBlock();
@@ -10318,7 +10325,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
           // TODO: Directly add as extra operand to the VPResumePHI recipe.
           Inc->setIncomingValueForBlock(BypassBlock, V);
         }
-        ++LoopsEpilogueVectorized;
 
         if (!Checks.hasChecks())
           DisableRuntimeUnroll = true;
@@ -10327,7 +10333,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
                                VF.MinProfitableTripCount, IC, &CM, BFI, PSI,
                                Checks, BestPlan);
         LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
-        ++LoopsVectorized;
 
         // Add metadata to disable runtime unrolling a scalar loop when there
         // are no runtime checks about strides and memory. A scalar loop that is
diff --git a/llvm/test/Transforms/LoopVectorize/vect.stats.ll b/llvm/test/Transforms/LoopVectorize/vect.stats.ll
index 9a55dc99c316b..b9b2cf757c0f7 100644
--- a/llvm/test/Transforms/LoopVectorize/vect.stats.ll
+++ b/llvm/test/Transforms/LoopVectorize/vect.stats.ll
@@ -1,12 +1,12 @@
-; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize --disable-output -stats -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -enable-early-exit-vectorization=1 --disable-output -stats -S 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
-;
-; We have 2 loops, one of them is vectorizable and the second one is not.
-;
+; We have 3 loops, two of them are vectorizable (with one being early-exit
+; vectorized) and the third one is not.
 
-; CHECK: 2 loop-vectorize               - Number of loops analyzed for vectorization
-; CHECK: 1 loop-vectorize               - Number of loops vectorized
+; CHECK: 3 loop-vectorize               - Number of loops analyzed for vectorization
+; CHECK: 1 loop-vectorize               - Number of early exit loops vectorized
+; CHECK: 2 loop-vectorize               - Number of loops vectorized
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -31,6 +31,39 @@ for.end:                                          ; preds = %entry, %for.body
   ret void
 }
 
+define i32 @early_exit_vectorized(i32 %end) {
+entry:
+  %p1 = alloca [1024 x i32]
+  %p2 = alloca [1024 x i32]
+  call void @init_mem(ptr %p1, i64 1024)
+  call void @init_mem(ptr %p2, i64 1024)
+  %end.clamped = and i32 %end, 1023
+  br label %for.body
+
+for.body:
+  %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ]
+  %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ]
+  %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind
+  %0 = load i32, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind
+  %1 = load i32, ptr %arrayidx2, align 4
+  %cmp.early = icmp eq i32 %0, %1
+  br i1 %cmp.early, label %found, label %for.inc
+
+for.inc:
+  %ind.next = add i8 %ind, 1
+  %conv = zext i8 %ind.next to i32
+  %gep.ind.next = add i64 %gep.ind, 1
+  %cmp = icmp ult i32 %conv, %end.clamped
+  br i1 %cmp, label %for.body, label %exit
+
+found:
+  ret i32 1
+
+exit:
+  ret i32 0
+}
+
 define void @not_vectorized(ptr nocapture %a, i64 %size) {
 entry:
   %cmp1 = icmp sle i64 %size, 0
@@ -56,3 +89,5 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %entry, %for.body
   ret void
 }
+
+declare void @init_mem(ptr, i64);



More information about the llvm-commits mailing list