[llvm] [VPlan] Add hidden `-vplan-print-after-all` option (PR #175839)

Andrei Elovikov via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 27 14:46:31 PST 2026


https://github.com/eas updated https://github.com/llvm/llvm-project/pull/175839

>From a50cbf408691601b062a4b82c53310d15ea5d6de Mon Sep 17 00:00:00 2001
From: Andrei Elovikov <andrei.elovikov at sifive.com>
Date: Tue, 27 Jan 2026 08:35:51 -0800
Subject: [PATCH 1/5] Limit to existing runPass lines only, use macro to get
 the name

---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 68 ++++++++---------
 llvm/lib/Transforms/Vectorize/VPlan.cpp       |  8 ++
 .../Vectorize/VPlanConstruction.cpp           |  5 ++
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 34 ++++-----
 .../Transforms/Vectorize/VPlanTransforms.h    | 49 +++++++-----
 .../LoopVectorize/vplan-print-after-all.ll    | 76 +++++++++++++++++++
 .../Vectorize/VPlanUncountableExitTest.cpp    |  4 +-
 7 files changed, 172 insertions(+), 72 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3cf5e5fb3f818..9016c4aa984e5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -366,6 +366,10 @@ cl::opt<bool>
                           cl::Hidden,
                           cl::desc("Verfiy VPlans after VPlan transforms."));
 
+cl::opt<bool> llvm::PrintAfterEachVPlanPass(
+    "vplan-print-after-all", cl::init(false), cl::Hidden,
+    cl::desc("Print after each VPlanTransforms::runPass."));
+
 // This flag enables the stress testing of the VPlan H-CFG construction in the
 // VPlan-native vectorization path. It must be used in conjuction with
 // -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
@@ -7380,17 +7384,16 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
     ++LoopsEarlyExitVectorized;
   // TODO: Move to VPlan transform stage once the transition to the VPlan-based
   // cost model is complete for better cost estimates.
-  VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF);
-  VPlanTransforms::runPass(VPlanTransforms::materializePacksAndUnpacks,
-                           BestVPlan);
-  VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
-  VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
+  RUN_VPLAN_PASS(VPlanTransforms::unrollByUF, BestVPlan, BestUF);
+  RUN_VPLAN_PASS(VPlanTransforms::materializePacksAndUnpacks, BestVPlan);
+  RUN_VPLAN_PASS(VPlanTransforms::materializeBroadcasts, BestVPlan);
+  RUN_VPLAN_PASS(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
   bool HasBranchWeights =
       hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator());
   if (HasBranchWeights) {
     std::optional<unsigned> VScale = CM.getVScaleForTuning();
-    VPlanTransforms::runPass(VPlanTransforms::addBranchWeightToMiddleTerminator,
-                             BestVPlan, BestVF, VScale);
+    RUN_VPLAN_PASS(VPlanTransforms::addBranchWeightToMiddleTerminator,
+                   BestVPlan, BestVF, VScale);
   }
 
   // Checks are the same for all VPlans, added to BestVPlan only for
@@ -8349,14 +8352,14 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
       // Now optimize the initial VPlan.
       VPlanTransforms::hoistPredicatedLoads(*Plan, PSE, OrigLoop);
       VPlanTransforms::sinkPredicatedStores(*Plan, PSE, OrigLoop);
-      VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
-                               *Plan, CM.getMinimalBitwidths());
-      VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
+      RUN_VPLAN_PASS(VPlanTransforms::truncateToMinimalBitwidths, *Plan,
+                     CM.getMinimalBitwidths());
+      RUN_VPLAN_PASS(VPlanTransforms::optimize, *Plan);
       // TODO: try to put addExplicitVectorLength close to addActiveLaneMask
       if (CM.foldTailWithEVL()) {
-        VPlanTransforms::runPass(VPlanTransforms::addExplicitVectorLength,
-                                 *Plan, CM.getMaxSafeElements());
-        VPlanTransforms::runPass(VPlanTransforms::optimizeEVLMasks, *Plan);
+        RUN_VPLAN_PASS(VPlanTransforms::addExplicitVectorLength, *Plan,
+                       CM.getMaxSafeElements());
+        RUN_VPLAN_PASS(VPlanTransforms::optimizeEVLMasks, *Plan);
       }
       assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
       VPlans.push_back(std::move(Plan));
@@ -8563,18 +8566,15 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
 
   // Apply mandatory transformation to handle reductions with multiple in-loop
   // uses if possible, bail out otherwise.
-  if (!VPlanTransforms::runPass(VPlanTransforms::handleMultiUseReductions,
-                                *Plan))
+  if (!RUN_VPLAN_PASS(VPlanTransforms::handleMultiUseReductions, *Plan))
     return nullptr;
   // Apply mandatory transformation to handle FP maxnum/minnum reduction with
   // NaNs if possible, bail out otherwise.
-  if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
-                                *Plan))
+  if (!RUN_VPLAN_PASS(VPlanTransforms::handleMaxMinNumReductions, *Plan))
     return nullptr;
 
   // Create whole-vector selects for find-last recurrences.
-  if (!VPlanTransforms::runPass(VPlanTransforms::handleFindLastReductions,
-                                *Plan))
+  if (!RUN_VPLAN_PASS(VPlanTransforms::handleFindLastReductions, *Plan))
     return nullptr;
 
   // Transform recipes to abstract recipes if it is legal and beneficial and
@@ -8584,8 +8584,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
   if (!CM.foldTailWithEVL()) {
     VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind, CM.PSE,
                           OrigLoop);
-    VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,
-                             CostCtx, Range);
+    RUN_VPLAN_PASS(VPlanTransforms::convertToAbstractRecipes, *Plan, CostCtx,
+                   Range);
   }
 
   for (ElementCount VF : Range)
@@ -8595,24 +8595,23 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
   // Interleave memory: for each Interleave Group we marked earlier as relevant
   // for this VPlan, replace the Recipes widening its memory instructions with a
   // single VPInterleaveRecipe at its insertion point.
-  VPlanTransforms::runPass(VPlanTransforms::createInterleaveGroups, *Plan,
-                           InterleaveGroups, RecipeBuilder,
-                           CM.isScalarEpilogueAllowed());
+  RUN_VPLAN_PASS(VPlanTransforms::createInterleaveGroups, *Plan,
+                 InterleaveGroups, RecipeBuilder, CM.isScalarEpilogueAllowed());
 
   // Replace VPValues for known constant strides.
-  VPlanTransforms::runPass(VPlanTransforms::replaceSymbolicStrides, *Plan, PSE,
-                           Legal->getLAI()->getSymbolicStrides());
+  RUN_VPLAN_PASS(VPlanTransforms::replaceSymbolicStrides, *Plan, PSE,
+                 Legal->getLAI()->getSymbolicStrides());
 
   auto BlockNeedsPredication = [this](BasicBlock *BB) {
     return Legal->blockNeedsPredication(BB);
   };
-  VPlanTransforms::runPass(VPlanTransforms::dropPoisonGeneratingRecipes, *Plan,
-                           BlockNeedsPredication);
+  RUN_VPLAN_PASS(VPlanTransforms::dropPoisonGeneratingRecipes, *Plan,
+                 BlockNeedsPredication);
 
   // Sink users of fixed-order recurrence past the recipe defining the previous
   // value and introduce FirstOrderRecurrenceSplice VPInstructions.
-  if (!VPlanTransforms::runPass(VPlanTransforms::adjustFixedOrderRecurrences,
-                                *Plan, Builder))
+  if (!RUN_VPLAN_PASS(VPlanTransforms::adjustFixedOrderRecurrences, *Plan,
+                      Builder))
     return nullptr;
 
   if (useActiveLaneMask(Style)) {
@@ -8895,7 +8894,7 @@ void LoopVectorizationPlanner::addReductionResultComputation(
   for (VPRecipeBase *R : ToDelete)
     R->eraseFromParent();
 
-  VPlanTransforms::runPass(VPlanTransforms::clearReductionWrapFlags, *Plan);
+  RUN_VPLAN_PASS(VPlanTransforms::clearReductionWrapFlags, *Plan);
 }
 
 void LoopVectorizationPlanner::attachRuntimeChecks(
@@ -9283,7 +9282,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
     VPIRInst->eraseFromParent();
     ResumePhi->eraseFromParent();
   }
-  VPlanTransforms::runPass(VPlanTransforms::removeDeadRecipes, MainPlan);
+  RUN_VPLAN_PASS(VPlanTransforms::removeDeadRecipes, MainPlan);
 
   using namespace VPlanPatternMatch;
   // When vectorizing the epilogue, FindFirstIV & FindLastIV reductions can
@@ -10120,9 +10119,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
                            BestPlan);
     // TODO: Move to general VPlan pipeline once epilogue loops are also
     // supported.
-    VPlanTransforms::runPass(
-        VPlanTransforms::materializeConstantVectorTripCount, BestPlan, VF.Width,
-        IC, PSE);
+    RUN_VPLAN_PASS(VPlanTransforms::materializeConstantVectorTripCount,
+                   BestPlan, VF.Width, IC, PSE);
     LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
                                  VF.MinProfitableTripCount);
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index d1aa1b35ac632..06014ce8afa59 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -912,6 +912,9 @@ bool VPlan::isExitBlock(VPBlockBase *VPBB) {
   return is_contained(ExitBlocks, VPBB);
 }
 
+/// To make RUN_VPLAN_PASS print final VPlan:
+static void printFinalVPlan(VPlan &) {}
+
 /// Generate the code inside the preheader and body of the vectorized loop.
 /// Assumes a single pre-header basic-block was created for this. Introduce
 /// additional basic-blocks as needed, and fill them all.
@@ -933,7 +936,12 @@ void VPlan::execute(VPTransformState *State) {
   LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
                     << ", UF=" << getUF() << '\n');
   setName("Final VPlan");
+  // TODO: -debug-only=loop-vectorize should probably just imply
+  // `-vplan-print-after-all`, but the latter is much more verbose than the
+  // former, so doing that would require updating existing tests and that hasn't
+  // been done (yet). For now, just do both:
   LLVM_DEBUG(dump());
+  RUN_VPLAN_PASS(printFinalVPlan, *this);
 
   BasicBlock *ScalarPh = State->CFG.ExitBB;
   VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 4fc7b2fe1c2de..d40404b6a1e4e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -586,6 +586,9 @@ static void simplifyLiveInsWithSCEV(VPlan &Plan,
   }
 }
 
+/// To make RUN_VPLAN_PASS print initial VPlan:
+static void printAfterInitialConstruction(VPlan &) {}
+
 std::unique_ptr<VPlan>
 VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
                              DebugLoc IVDL, PredicatedScalarEvolution &PSE,
@@ -594,6 +597,8 @@ VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
   std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
   addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
   simplifyLiveInsWithSCEV(*VPlan0, PSE);
+
+  RUN_VPLAN_PASS(printAfterInitialConstruction, *VPlan0);
   return VPlan0;
 }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 90e32b9e1ce1d..4f5635d9341b2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2675,23 +2675,23 @@ void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
 }
 
 void VPlanTransforms::optimize(VPlan &Plan) {
-  runPass(removeRedundantCanonicalIVs, Plan);
-  runPass(removeRedundantInductionCasts, Plan);
-
-  runPass(simplifyRecipes, Plan);
-  runPass(removeDeadRecipes, Plan);
-  runPass(simplifyBlends, Plan);
-  runPass(legalizeAndOptimizeInductions, Plan);
-  runPass(narrowToSingleScalarRecipes, Plan);
-  runPass(removeRedundantExpandSCEVRecipes, Plan);
-  runPass(simplifyRecipes, Plan);
-  runPass(removeBranchOnConst, Plan);
-  runPass(removeDeadRecipes, Plan);
-
-  runPass(createAndOptimizeReplicateRegions, Plan);
-  runPass(hoistInvariantLoads, Plan);
-  runPass(mergeBlocksIntoPredecessors, Plan);
-  runPass(licm, Plan);
+  RUN_VPLAN_PASS(removeRedundantCanonicalIVs, Plan);
+  RUN_VPLAN_PASS(removeRedundantInductionCasts, Plan);
+
+  RUN_VPLAN_PASS(simplifyRecipes, Plan);
+  RUN_VPLAN_PASS(removeDeadRecipes, Plan);
+  RUN_VPLAN_PASS(simplifyBlends, Plan);
+  RUN_VPLAN_PASS(legalizeAndOptimizeInductions, Plan);
+  RUN_VPLAN_PASS(narrowToSingleScalarRecipes, Plan);
+  RUN_VPLAN_PASS(removeRedundantExpandSCEVRecipes, Plan);
+  RUN_VPLAN_PASS(simplifyRecipes, Plan);
+  RUN_VPLAN_PASS(removeBranchOnConst, Plan);
+  RUN_VPLAN_PASS(removeDeadRecipes, Plan);
+
+  RUN_VPLAN_PASS(createAndOptimizeReplicateRegions, Plan);
+  RUN_VPLAN_PASS(hoistInvariantLoads, Plan);
+  RUN_VPLAN_PASS(mergeBlocksIntoPredecessors, Plan);
+  RUN_VPLAN_PASS(licm, Plan);
 }
 
 // Add a VPActiveLaneMaskPHIRecipe and related recipes to \p Plan and replace
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index e0d09a099647a..15f03fb9d3b19 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -33,28 +33,41 @@ class VPRecipeBuilder;
 struct VFRange;
 
 LLVM_ABI_FOR_TEST extern cl::opt<bool> VerifyEachVPlan;
+LLVM_ABI_FOR_TEST extern cl::opt<bool> PrintAfterEachVPlanPass;
 LLVM_ABI_FOR_TEST extern cl::opt<bool> EnableWideActiveLaneMask;
 
 struct VPlanTransforms {
-  /// Helper to run a VPlan transform \p Transform on \p VPlan, forwarding extra
-  /// arguments to the transform. Returns the boolean returned by the transform.
-  template <typename... ArgsTy>
-  static bool runPass(bool (*Transform)(VPlan &, ArgsTy...), VPlan &Plan,
-                      typename std::remove_reference<ArgsTy>::type &...Args) {
-    bool Res = Transform(Plan, Args...);
-    if (VerifyEachVPlan)
-      verifyVPlanIsValid(Plan);
-    return Res;
-  }
-  /// Helper to run a VPlan transform \p Transform on \p VPlan, forwarding extra
-  /// arguments to the transform.
-  template <typename... ArgsTy>
-  static void runPass(void (*Fn)(VPlan &, ArgsTy...), VPlan &Plan,
-                      typename std::remove_reference<ArgsTy>::type &...Args) {
-    Fn(Plan, Args...);
-    if (VerifyEachVPlan)
-      verifyVPlanIsValid(Plan);
+  /// Helper to run a VPlan pass \p Pass on \p VPlan, forwarding extra arguments
+  /// to the pass. Performs verification/printing after each VPlan pass if
+  /// requested via command line options.
+  template <typename PassTy, typename... ArgsTy>
+  static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan,
+                                ArgsTy &&...Args) {
+    auto PostTransformActions = [&]() {
+      // Make sure to print before verification, so that output is more useful
+      // in case of failures:
+      if (PrintAfterEachVPlanPass) {
+        dbgs() << "VPlan after " << PassName << '\n';
+        dbgs() << Plan << '\n';
+      }
+      if (VerifyEachVPlan)
+        verifyVPlanIsValid(Plan);
+    };
+
+    using ResTy = decltype(std::forward<PassTy>(Pass)(
+        Plan, std::forward<ArgsTy>(Args)...));
+    if constexpr (std::is_same_v<ResTy, void>) {
+      std::forward<PassTy>(Pass)(Plan, std::forward<ArgsTy>(Args)...);
+      PostTransformActions();
+    } else {
+      decltype(auto) Res =
+          std::forward<PassTy>(Pass)(Plan, std::forward<ArgsTy>(Args)...);
+      PostTransformActions();
+      return Res;
+    }
   }
+#define RUN_VPLAN_PASS(PASS, ...)                                              \
+  llvm::VPlanTransforms::runPass(#PASS, PASS, __VA_ARGS__)
 
   /// Create a base VPlan0, serving as the common starting point for all later
   /// candidates. It consists of an initial plain CFG loop with loop blocks from
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll b/llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll
new file mode 100644
index 0000000000000..34f576ed31da5
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll
@@ -0,0 +1,76 @@
+; REQUIRES: asserts
+
+; Verify that `-vplan-print-after-all` option works.
+
+; RUN: opt -passes=loop-vectorize -disable-output -vplan-print-after-all -force-vector-width=4 < %s 2>&1 | FileCheck %s --implicit-check-not "VPlan after"
+
+; CHECK: VPlan after printAfterInitialConstruction
+; CHECK: VPlan after VPlanTransforms::clearReductionWrapFlags
+; CHECK: VPlan after VPlanTransforms::handleMultiUseReductions
+; CHECK: VPlan after VPlanTransforms::handleMaxMinNumReductions
+; CHECK: VPlan after VPlanTransforms::handleFindLastReductions
+; CHECK: VPlan after VPlanTransforms::convertToAbstractRecipes
+; CHECK: VPlan after VPlanTransforms::createInterleaveGroups
+; CHECK: VPlan after VPlanTransforms::replaceSymbolicStrides
+; CHECK: VPlan after VPlanTransforms::dropPoisonGeneratingRecipes
+; CHECK: VPlan after VPlanTransforms::adjustFixedOrderRecurrences
+; CHECK: VPlan after VPlanTransforms::truncateToMinimalBitwidths
+; CHECK: VPlan after removeRedundantCanonicalIVs
+; CHECK: VPlan after removeRedundantInductionCasts
+; CHECK: VPlan after simplifyRecipes
+; CHECK: VPlan after removeDeadRecipes
+; CHECK: VPlan after simplifyBlends
+; CHECK: VPlan after legalizeAndOptimizeInductions
+; CHECK: VPlan after narrowToSingleScalarRecipes
+; CHECK: VPlan after removeRedundantExpandSCEVRecipes
+; CHECK: VPlan after simplifyRecipes
+; CHECK: VPlan after removeBranchOnConst
+; CHECK: VPlan after removeDeadRecipes
+; CHECK: VPlan after createAndOptimizeReplicateRegions
+; CHECK: VPlan after hoistInvariantLoads
+; CHECK: VPlan after mergeBlocksIntoPredecessors
+; CHECK: VPlan after licm
+; CHECK: VPlan after VPlanTransforms::optimize
+; CHECK: VPlan after VPlanTransforms::materializeConstantVectorTripCount
+; CHECK: VPlan after VPlanTransforms::unrollByUF
+; CHECK: VPlan after VPlanTransforms::materializePacksAndUnpacks
+; CHECK: VPlan after VPlanTransforms::materializeBroadcasts
+; CHECK: VPlan after VPlanTransforms::replicateByVF
+; CHECK: VPlan after printFinalVPlan
+; Also verify that VPlan is actually printed:
+; CHECK-NEXT: VPlan 'Final VPlan for VF={4},UF={1}' {
+; CHECK-NEXT: Live-in ir<%smax> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT:   IR   %smax = call i64 @llvm.smax.i64(i64 %n, i64 1)
+; CHECK-NEXT:   EMIT vp<%min.iters.check> = icmp ult ir<%smax>, ir<4>
+; CHECK-NEXT:   EMIT branch-on-cond vp<%min.iters.check>
+; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT:   EMIT vp<%n.mod.vf> = urem ir<%smax>, ir<4>
+; CHECK-NEXT:   EMIT vp<%n.vec> = sub ir<%smax>, vp<%n.mod.vf>
+; CHECK-NEXT:   EMIT vp<%3> = step-vector i64
+; CHECK-NEXT:   EMIT vp<%4> = broadcast ir<4>
+; CHECK-NEXT: Successor(s): vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT:   EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
+; CHECK-NEXT:   WIDEN-PHI ir<%iv> = phi [ vp<%3>, vector.ph ], [ vp<%vec.ind.next>, vector.body ]
+; CHECK-NEXT:   CLONE ir<%gep> = getelementptr ir<%ptr>, vp<%index>
+
+define void @foo(ptr dereferenceable(1024) %ptr, i64 %n) {
+entry:
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %gep = getelementptr i64, ptr %ptr, i64 %iv
+  store i64 %iv, ptr %gep
+  %iv.next = add nsw i64 %iv, 1
+  %exitcond = icmp slt i64 %iv.next, %n
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp
index 270537a333b8e..f895a04bff786 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp
@@ -48,7 +48,7 @@ TEST_F(VPUncountableExitTest, FindUncountableExitRecipes) {
   BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
   auto Plan = buildVPlan(LoopHeader, /*HasUncountableExit=*/true);
   VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(*Plan, *TLI);
-  VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
+  RUN_VPLAN_PASS(VPlanTransforms::optimize, *Plan);
 
   SmallVector<VPRecipeBase *> Recipes;
   SmallVector<VPRecipeBase *> GEPs;
@@ -84,7 +84,7 @@ TEST_F(VPUncountableExitTest, NoUncountableExit) {
   BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
   auto Plan = buildVPlan(LoopHeader);
   VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(*Plan, *TLI);
-  VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
+  RUN_VPLAN_PASS(VPlanTransforms::optimize, *Plan);
 
   SmallVector<VPRecipeBase *> Recipes;
   SmallVector<VPRecipeBase *> GEPs;

>From aea06bd607dd4e6691500a4e70c597eca65f1e6a Mon Sep 17 00:00:00 2001
From: Andrei Elovikov <andrei.elovikov at sifive.com>
Date: Tue, 27 Jan 2026 09:41:52 -0800
Subject: [PATCH 2/5] Initial VPlan won't pass verifier

---
 llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp         | 2 +-
 llvm/lib/Transforms/Vectorize/VPlanTransforms.h             | 6 ++++--
 llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index d40404b6a1e4e..379d9d3e15795 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -598,7 +598,7 @@ VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
   addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
   simplifyLiveInsWithSCEV(*VPlan0, PSE);
 
-  RUN_VPLAN_PASS(printAfterInitialConstruction, *VPlan0);
+  RUN_VPLAN_PASS_NO_VERIFY(printAfterInitialConstruction, *VPlan0);
   return VPlan0;
 }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 15f03fb9d3b19..11544d95dc3c5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -40,7 +40,7 @@ struct VPlanTransforms {
   /// Helper to run a VPlan pass \p Pass on \p VPlan, forwarding extra arguments
   /// to the pass. Performs verification/printing after each VPlan pass if
   /// requested via command line options.
-  template <typename PassTy, typename... ArgsTy>
+  template <bool EnableVerify = true, typename PassTy, typename... ArgsTy>
   static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan,
                                 ArgsTy &&...Args) {
     auto PostTransformActions = [&]() {
@@ -50,7 +50,7 @@ struct VPlanTransforms {
         dbgs() << "VPlan after " << PassName << '\n';
         dbgs() << Plan << '\n';
       }
-      if (VerifyEachVPlan)
+      if (VerifyEachVPlan && EnableVerify)
         verifyVPlanIsValid(Plan);
     };
 
@@ -68,6 +68,8 @@ struct VPlanTransforms {
   }
 #define RUN_VPLAN_PASS(PASS, ...)                                              \
   llvm::VPlanTransforms::runPass(#PASS, PASS, __VA_ARGS__)
+#define RUN_VPLAN_PASS_NO_VERIFY(PASS, ...)                                    \
+  llvm::VPlanTransforms::runPass<false>(#PASS, PASS, __VA_ARGS__)
 
   /// Create a base VPlan0, serving as the common starting point for all later
   /// candidates. It consists of an initial plain CFG loop with loop blocks from
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll b/llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll
index 34f576ed31da5..9b59afbf8a6c6 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll
@@ -2,7 +2,7 @@
 
 ; Verify that `-vplan-print-after-all` option works.
 
-; RUN: opt -passes=loop-vectorize -disable-output -vplan-print-after-all -force-vector-width=4 < %s 2>&1 | FileCheck %s --implicit-check-not "VPlan after"
+; RUN: opt -passes=loop-vectorize -disable-output -vplan-print-after-all -force-vector-width=4 -vplan-verify-each < %s 2>&1 | FileCheck %s --implicit-check-not "VPlan after"
 
 ; CHECK: VPlan after printAfterInitialConstruction
 ; CHECK: VPlan after VPlanTransforms::clearReductionWrapFlags

>From 047bc7096288046cd184104627061c5c8d20b24c Mon Sep 17 00:00:00 2001
From: Andrei Elovikov <andrei.elovikov at sifive.com>
Date: Tue, 27 Jan 2026 14:29:58 -0800
Subject: [PATCH 3/5] Update TODO comment about "-debug"

---
 llvm/lib/Transforms/Vectorize/VPlan.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 06014ce8afa59..3055d5aac74ae 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -936,10 +936,9 @@ void VPlan::execute(VPTransformState *State) {
   LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
                     << ", UF=" << getUF() << '\n');
   setName("Final VPlan");
-  // TODO: -debug-only=loop-vectorize should probably just imply
-  // `-vplan-print-after-all`, but the latter is much more verbose than the
-  // former, so doing that would require updating existing tests and that hasn't
-  // been done (yet). For now, just do both:
+  // TODO: RUN_VPLAN_PASS/VPlanTransforms::runPass should automatically dump
+  // VPlans after some specific stages when "-debug" is specified, but that
+  // hasn't been implemented yet. For now, just do both:
   LLVM_DEBUG(dump());
   RUN_VPLAN_PASS(printFinalVPlan, *this);
 

>From 2146c90f781de6a428c30486bb9315dbd1ba0aa9 Mon Sep 17 00:00:00 2001
From: Andrei Elovikov <andrei.elovikov at sifive.com>
Date: Tue, 27 Jan 2026 14:31:19 -0800
Subject: [PATCH 4/5] Don't use RUN_VPLAN_PASS in llvm/unittests/**

---
 .../Transforms/Vectorize/VPlanUncountableExitTest.cpp         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp
index f895a04bff786..c701382c54e6b 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp
@@ -48,7 +48,7 @@ TEST_F(VPUncountableExitTest, FindUncountableExitRecipes) {
   BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
   auto Plan = buildVPlan(LoopHeader, /*HasUncountableExit=*/true);
   VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(*Plan, *TLI);
-  RUN_VPLAN_PASS(VPlanTransforms::optimize, *Plan);
+  VPlanTransforms::optimize(*Plan);
 
   SmallVector<VPRecipeBase *> Recipes;
   SmallVector<VPRecipeBase *> GEPs;
@@ -84,7 +84,7 @@ TEST_F(VPUncountableExitTest, NoUncountableExit) {
   BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
   auto Plan = buildVPlan(LoopHeader);
   VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(*Plan, *TLI);
-  RUN_VPLAN_PASS(VPlanTransforms::optimize, *Plan);
+  VPlanTransforms::optimize(*Plan);
 
   SmallVector<VPRecipeBase *> Recipes;
   SmallVector<VPRecipeBase *> GEPs;

>From 7cc41355a06e69acef43b1ddb963bff86eeaea6a Mon Sep 17 00:00:00 2001
From: Andrei Elovikov <andrei.elovikov at sifive.com>
Date: Tue, 27 Jan 2026 14:44:37 -0800
Subject: [PATCH 5/5] Update test

---
 .../LoopVectorize/vplan-print-after-all.ll    | 39 +++++++------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll b/llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll
index 9b59afbf8a6c6..96e15740cd413 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-print-after-all.ll
@@ -1,9 +1,9 @@
+; RUN: opt -passes=loop-vectorize -disable-output -vplan-print-after-all -force-vector-width=4 -vplan-verify-each < %s 2>&1 | FileCheck %s --implicit-check-not "VPlan after"
+; RUN: opt -passes=loop-vectorize -disable-output -vplan-print-after-all -force-vector-width=4 -vplan-verify-each < %s 2>&1 | FileCheck %s --check-prefix CHECK-DUMP
 ; REQUIRES: asserts
 
 ; Verify that `-vplan-print-after-all` option works.
 
-; RUN: opt -passes=loop-vectorize -disable-output -vplan-print-after-all -force-vector-width=4 -vplan-verify-each < %s 2>&1 | FileCheck %s --implicit-check-not "VPlan after"
-
 ; CHECK: VPlan after printAfterInitialConstruction
 ; CHECK: VPlan after VPlanTransforms::clearReductionWrapFlags
 ; CHECK: VPlan after VPlanTransforms::handleMultiUseReductions
@@ -37,29 +37,20 @@
 ; CHECK: VPlan after VPlanTransforms::materializeBroadcasts
 ; CHECK: VPlan after VPlanTransforms::replicateByVF
 ; CHECK: VPlan after printFinalVPlan
-; Also verify that VPlan is actually printed:
-; CHECK-NEXT: VPlan 'Final VPlan for VF={4},UF={1}' {
-; CHECK-NEXT: Live-in ir<%smax> = original trip-count
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<entry>:
-; CHECK-NEXT:   IR   %smax = call i64 @llvm.smax.i64(i64 %n, i64 1)
-; CHECK-NEXT:   EMIT vp<%min.iters.check> = icmp ult ir<%smax>, ir<4>
-; CHECK-NEXT:   EMIT branch-on-cond vp<%min.iters.check>
-; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
-; CHECK-EMPTY:
-; CHECK-NEXT: vector.ph:
-; CHECK-NEXT:   EMIT vp<%n.mod.vf> = urem ir<%smax>, ir<4>
-; CHECK-NEXT:   EMIT vp<%n.vec> = sub ir<%smax>, vp<%n.mod.vf>
-; CHECK-NEXT:   EMIT vp<%3> = step-vector i64
-; CHECK-NEXT:   EMIT vp<%4> = broadcast ir<4>
-; CHECK-NEXT: Successor(s): vector.body
-; CHECK-EMPTY:
-; CHECK-NEXT: vector.body:
-; CHECK-NEXT:   EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
-; CHECK-NEXT:   WIDEN-PHI ir<%iv> = phi [ vp<%3>, vector.ph ], [ vp<%vec.ind.next>, vector.body ]
-; CHECK-NEXT:   CLONE ir<%gep> = getelementptr ir<%ptr>, vp<%index>
 
-define void @foo(ptr dereferenceable(1024) %ptr, i64 %n) {
+; Also verify that VPlans are actually printed (we aren't interested in the
+; exact dump content, just that it's performed):
+
+; CHECK-DUMP:      VPlan after printAfterInitialConstruction
+; CHECK-DUMP-NEXT: VPlan ' for UF>=1' {
+;
+; CHECK-DUMP:      VPlan after VPlanTransforms::optimize
+; CHECK-DUMP-NEXT: VPlan 'Initial VPlan for VF={4},UF>=1' {
+;
+; CHECK-DUMP:      VPlan after printFinalVPlan
+; CHECK-DUMP-NEXT: VPlan 'Final VPlan for VF={4},UF={1}' {
+
+define void @foo(ptr %ptr, i64 %n) {
 entry:
   br label %header
 



More information about the llvm-commits mailing list