[llvm] [VPlan] Split out optimizeEVLMasks. NFC (PR #174925)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 10 02:48:02 PST 2026


https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/174925

>From 68eb4fc80e82cfd6fd970bf5077013db078a35c8 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 8 Jan 2026 16:27:40 +0800
Subject: [PATCH 1/6] [VPlan] Split out optimizeEVLMasks. NFC

Addresses part of #153144 and splits off part of #166164

There are two parts to the EVL transform:

1) Convert the loop so the number of elements processed each iteration is EVL, not VF. The IV and header mask are replaced with EVL-based variants.
2) Optimize users of the EVL based header mask to VP intrinsic based recipes.

1) changes the semantics of the vector loop region, whereas the 2) needs to preserve it. This splits 2) out so we don't mix the two up, and allows us to move 1) earlier in the pipeline in a future PR.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  6 +-
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 82 +++++++++----------
 .../Transforms/Vectorize/VPlanTransforms.h    |  8 ++
 3 files changed, 49 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 31c44fc46c973..903fb7b25ad6a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8328,10 +8328,12 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
       VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
                                *Plan, CM.getMinimalBitwidths());
       VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
-      // TODO: try to put it close to addActiveLaneMask().
-      if (CM.foldTailWithEVL())
+      // TODO: try to put addExplicitVectorLength close to addActiveLaneMask
+      if (CM.foldTailWithEVL()) {
         VPlanTransforms::runPass(VPlanTransforms::addExplicitVectorLength,
                                  *Plan, CM.getMaxSafeElements());
+        VPlanTransforms::runPass(VPlanTransforms::optimizeEVLMasks, *Plan);
+      }
       assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
       VPlans.push_back(std::move(Plan));
     }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b1880517a4199..e1dba4cf4a064 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2979,8 +2979,42 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
   return nullptr;
 }
 
-/// Replace recipes with their EVL variants.
-static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
+/// Optimize away any EVL-based header masks to VP intrinsic based recipes.
+/// The transforms here need to preserve the original semantics.
+void VPlanTransforms::optimizeEVLMasks(VPlan &Plan) {
+  // Find the EVL-based header mask if it exists: icmp ult step-vector, EVL
+  VPInstruction *HeaderMask = nullptr;
+  for (VPRecipeBase &R : *Plan.getVectorLoopRegion()->getEntryBasicBlock()) {
+    if (match(&R, m_ICmp(m_VPInstruction<VPInstruction::StepVector>(),
+                         m_EVL(m_VPValue())))) {
+      HeaderMask = cast<VPInstruction>(&R);
+      break;
+    }
+  }
+  if (!HeaderMask)
+    return;
+
+  VPTypeAnalysis TypeInfo(Plan);
+  VPValue *EVL = HeaderMask->getOperand(1);
+
+  for (VPUser *U : collectUsersRecursively(HeaderMask)) {
+    VPRecipeBase *R = cast<VPRecipeBase>(U);
+    if (auto *NewR = optimizeMaskToEVL(HeaderMask, *R, TypeInfo, *EVL)) {
+      NewR->insertBefore(R);
+      for (auto [Old, New] :
+           zip_equal(R->definedValues(), NewR->definedValues()))
+        Old->replaceAllUsesWith(New);
+      // Erase dead stores, the rest will be removed by removeDeadRecipes.
+      if (R->getNumDefinedValues() == 0)
+        R->eraseFromParent();
+    }
+  }
+  removeDeadRecipes(Plan);
+}
+
+/// After replacing the IV with a EVL-based IV, fixup recipes that use VF to use
+/// the EVL instead to avoid incorrect updates on the penultimate iteration.
+static void fixupVFUsersForEVL(VPlan &Plan, VPValue &EVL) {
   VPTypeAnalysis TypeInfo(Plan);
   VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
   VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
@@ -3008,10 +3042,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
     return isa<VPWidenPointerInductionRecipe>(U);
   });
 
-  // Defer erasing recipes till the end so that we don't invalidate the
-  // VPTypeAnalysis cache.
-  SmallVector<VPRecipeBase *> ToErase;
-
   // Create a scalar phi to track the previous EVL if fixed-order recurrence is
   // contained.
   bool ContainsFORs =
@@ -3046,7 +3076,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
             R.getDebugLoc());
         VPSplice->insertBefore(&R);
         R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
-        ToErase.push_back(&R);
       }
     }
   }
@@ -3067,43 +3096,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
       CmpInst::ICMP_ULT,
       Builder.createNaryOp(VPInstruction::StepVector, {}, EVLType), &EVL);
   HeaderMask->replaceAllUsesWith(EVLMask);
-  ToErase.push_back(HeaderMask->getDefiningRecipe());
-
-  // Try to optimize header mask recipes away to their EVL variants.
-  // TODO: Split optimizeMaskToEVL out and move into
-  // VPlanTransforms::optimize. transformRecipestoEVLRecipes should be run in
-  // tryToBuildVPlanWithVPRecipes beforehand.
-  for (VPUser *U : collectUsersRecursively(EVLMask)) {
-    auto *CurRecipe = cast<VPRecipeBase>(U);
-    VPRecipeBase *EVLRecipe =
-        optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, EVL);
-    if (!EVLRecipe)
-      continue;
-
-    unsigned NumDefVal = EVLRecipe->getNumDefinedValues();
-    assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
-           "New recipe must define the same number of values as the "
-           "original.");
-    EVLRecipe->insertBefore(CurRecipe);
-    if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe, VPInterleaveEVLRecipe>(
-            EVLRecipe)) {
-      for (unsigned I = 0; I < NumDefVal; ++I) {
-        VPValue *CurVPV = CurRecipe->getVPValue(I);
-        CurVPV->replaceAllUsesWith(EVLRecipe->getVPValue(I));
-      }
-    }
-    ToErase.push_back(CurRecipe);
-  }
-  // Remove dead EVL mask.
-  if (EVLMask->getNumUsers() == 0)
-    ToErase.push_back(EVLMask->getDefiningRecipe());
-
-  for (VPRecipeBase *R : reverse(ToErase)) {
-    SmallVector<VPValue *> PossiblyDead(R->operands());
-    R->eraseFromParent();
-    for (VPValue *Op : PossiblyDead)
-      recursivelyDeleteDeadRecipes(Op);
-  }
 }
 
 /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
@@ -3201,7 +3193,7 @@ void VPlanTransforms::addExplicitVectorLength(
       DebugLoc::getCompilerGenerated(), "avl.next");
   AVLPhi->addOperand(NextAVL);
 
-  transformRecipestoEVLRecipes(Plan, *VPEVL);
+  fixupVFUsersForEVL(Plan, *VPEVL);
 
   // Replace all uses of VPCanonicalIVPHIRecipe by
   // VPEVLBasedIVPHIRecipe except for the canonical IV increment.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index f4ecee4e7f04f..35af0a76019dc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -266,6 +266,14 @@ struct VPlanTransforms {
   addExplicitVectorLength(VPlan &Plan,
                           const std::optional<unsigned> &MaxEVLSafeElements);
 
+  /// Optimize recipes which use an EVL based header mask to a VP intrinsic:
+  ///
+  /// %mask = icmp step-vector, EVL
+  /// %load = load %ptr, %mask
+  /// -->
+  /// %load = vp.load %ptr, EVL
+  static void optimizeEVLMasks(VPlan &Plan);
+
   // For each Interleave Group in \p InterleaveGroups replace the Recipes
   // widening its memory instructions with a single VPInterleaveRecipe at its
   // insertion point.

>From abbb9390d419fc0089cf282776cd8f38da40e817 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 8 Jan 2026 16:34:14 +0800
Subject: [PATCH 2/6] Remove dead recipes in addExplicitVectorLength too

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index e1dba4cf4a064..b4ba6b2ee2644 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3194,6 +3194,7 @@ void VPlanTransforms::addExplicitVectorLength(
   AVLPhi->addOperand(NextAVL);
 
   fixupVFUsersForEVL(Plan, *VPEVL);
+  removeDeadRecipes(Plan);
 
   // Replace all uses of VPCanonicalIVPHIRecipe by
   // VPEVLBasedIVPHIRecipe except for the canonical IV increment.

>From e5dbe6694c165b5a4d1a891ca4e66bb87808592b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sat, 10 Jan 2026 15:30:35 +0800
Subject: [PATCH 3/6] Use m_SpecificICmp, use m_StepVector, pattern match EVL

---
 llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h |  5 +++++
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 10 +++++-----
 llvm/lib/Transforms/Vectorize/VPlanTransforms.h   |  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 70b174509fc47..0626bb1077f0e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -633,6 +633,11 @@ struct SpecificCmp_match {
       : Predicate(Pred), Op0(LHS), Op1(RHS) {}
 
   bool match(const VPValue *V) const {
+    auto *DefR = V->getDefiningRecipe();
+    return DefR && match(DefR);
+  }
+
+  bool match(const VPRecipeBase *V) const {
     CmpPredicate CurrentPred;
     return Cmp_match<Op0_t, Op1_t, Opcodes...>(CurrentPred, Op0, Op1)
                .match(V) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b4ba6b2ee2644..9747820ee883f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2983,11 +2983,12 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
 /// The transforms here need to preserve the original semantics.
 void VPlanTransforms::optimizeEVLMasks(VPlan &Plan) {
   // Find the EVL-based header mask if it exists: icmp ult step-vector, EVL
-  VPInstruction *HeaderMask = nullptr;
+  VPValue *HeaderMask = nullptr, *EVL = nullptr;
   for (VPRecipeBase &R : *Plan.getVectorLoopRegion()->getEntryBasicBlock()) {
-    if (match(&R, m_ICmp(m_VPInstruction<VPInstruction::StepVector>(),
-                         m_EVL(m_VPValue())))) {
-      HeaderMask = cast<VPInstruction>(&R);
+    if (match(&R, m_SpecificICmp(CmpInst::ICMP_ULT, m_StepVector(),
+                                 m_VPValue(EVL))) &&
+        match(EVL, m_EVL(m_VPValue()))) {
+      HeaderMask = R.getVPSingleValue();
       break;
     }
   }
@@ -2995,7 +2996,6 @@ void VPlanTransforms::optimizeEVLMasks(VPlan &Plan) {
     return;
 
   VPTypeAnalysis TypeInfo(Plan);
-  VPValue *EVL = HeaderMask->getOperand(1);
 
   for (VPUser *U : collectUsersRecursively(HeaderMask)) {
     VPRecipeBase *R = cast<VPRecipeBase>(U);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 35af0a76019dc..8f3d3b40c1c77 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -268,7 +268,7 @@ struct VPlanTransforms {
 
   /// Optimize recipes which use an EVL based header mask to a VP intrinsic:
   ///
-  /// %mask = icmp step-vector, EVL
+  /// %mask = icmp ult step-vector, EVL
   /// %load = load %ptr, %mask
   /// -->
   /// %load = vp.load %ptr, EVL

>From 5a2b815c74856bb412de2bc69803b4124afdb855 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sat, 10 Jan 2026 15:59:01 +0800
Subject: [PATCH 4/6] Erase recipes at the end to avoid invalidating TypeInfo

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 9747820ee883f..4ee5dded29a83 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2608,7 +2608,6 @@ void VPlanTransforms::truncateToMinimalBitwidths(
         ProcessedIter->second = NewOp;
         R.setOperand(Idx, NewOp);
       }
-
     }
   }
 }
@@ -2996,6 +2995,7 @@ void VPlanTransforms::optimizeEVLMasks(VPlan &Plan) {
     return;
 
   VPTypeAnalysis TypeInfo(Plan);
+  SmallVector<VPRecipeBase *> OldRecipes;
 
   for (VPUser *U : collectUsersRecursively(HeaderMask)) {
     VPRecipeBase *R = cast<VPRecipeBase>(U);
@@ -3004,11 +3004,13 @@ void VPlanTransforms::optimizeEVLMasks(VPlan &Plan) {
       for (auto [Old, New] :
            zip_equal(R->definedValues(), NewR->definedValues()))
         Old->replaceAllUsesWith(New);
-      // Erase dead stores, the rest will be removed by removeDeadRecipes.
-      if (R->getNumDefinedValues() == 0)
-        R->eraseFromParent();
+      OldRecipes.push_back(R);
     }
   }
+  // Erase recipes at the end so we don't invalidate TypeInfo.
+  for (VPRecipeBase *OldR : OldRecipes)
+    OldR->eraseFromParent();
+  // Clean up any recipes now made dead.
   removeDeadRecipes(Plan);
 }
 

>From acf669c9b0eee7bb218121dcccd47c6088631b43 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sat, 10 Jan 2026 16:12:03 +0800
Subject: [PATCH 5/6] Fix stray change

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 4ee5dded29a83..62016bdffc502 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2608,6 +2608,7 @@ void VPlanTransforms::truncateToMinimalBitwidths(
         ProcessedIter->second = NewOp;
         R.setOperand(Idx, NewOp);
       }
+
     }
   }
 }

>From 3a6438e3878daaa72bdab48169319cfc0b3a784d Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sat, 10 Jan 2026 18:47:43 +0800
Subject: [PATCH 6/6] Address comments

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 62016bdffc502..1ea057100e03b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2997,7 +2997,6 @@ void VPlanTransforms::optimizeEVLMasks(VPlan &Plan) {
 
   VPTypeAnalysis TypeInfo(Plan);
   SmallVector<VPRecipeBase *> OldRecipes;
-
   for (VPUser *U : collectUsersRecursively(HeaderMask)) {
     VPRecipeBase *R = cast<VPRecipeBase>(U);
     if (auto *NewR = optimizeMaskToEVL(HeaderMask, *R, TypeInfo, *EVL)) {
@@ -3011,7 +3010,6 @@ void VPlanTransforms::optimizeEVLMasks(VPlan &Plan) {
   // Erase recipes at the end so we don't invalidate TypeInfo.
   for (VPRecipeBase *OldR : OldRecipes)
     OldR->eraseFromParent();
-  // Clean up any recipes now made dead.
   removeDeadRecipes(Plan);
 }
 



More information about the llvm-commits mailing list