[llvm] [VPlan] Run initial recipe simplification on VPlan0. (PR #176828)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 13 03:33:08 PST 2026


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/176828

>From f5b3906419faad3527d8ed50c255fc4559b0dd5b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 19 Jan 2026 21:57:13 +0000
Subject: [PATCH 1/4] [VPlan] Run initial recipe simplification on VPlan0.

In some cases, LV gets simplifyable IR as input. Directly apply
simplifications on the initial VPlan0 to avoid vectorization in cases
where the loop body can be folded away.

Using the end-to-end pipeline, this is relatively rare, but when
reducing test cases, the reduction often ends up with cases with trivial
folds. Rejecting those will result in more robust & realistic test
cases.

As follow-up, I also plan to add initial dead recipe removal.

Depends on https://github.com/llvm/llvm-project/pull/176795 (included in
PR).
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |   6 +-
 .../Transforms/Vectorize/VPlanAnalysis.cpp    |  11 +
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  49 +++-
 .../X86/cost-conditional-branches.ll          | 260 +-----------------
 .../X86/pr141968-instsimplifyfolder.ll        | 101 +------
 .../epilog-vectorization-reductions.ll        |   2 -
 ...r154045-dont-fold-extractelement-livein.ll |  13 -
 .../LoopVectorize/select-neg-cond.ll          |   4 +-
 8 files changed, 74 insertions(+), 372 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index db2c84d78fb15..d508d290a12d6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8122,6 +8122,8 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
       Legal->getReductionVars(), Legal->getFixedOrderRecurrences(),
       CM.getInLoopReductions(), Hints.allowReordering());
 
+  VPlanTransforms::simplifyRecipes(*VPlan0);
+
   auto MaxVFTimes2 = MaxVF * 2;
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
     VFRange SubRange = {VF, MaxVFTimes2};
@@ -8464,7 +8466,9 @@ void LoopVectorizationPlanner::addReductionResultComputation(
   for (VPRecipeBase &R :
        Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
     VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
-    if (!PhiR)
+    // TODO: Remove check for constant incoming value once removeDeadRecipes is
+    // used on VPlan0.
+    if (!PhiR || isa<VPIRValue>(PhiR->getOperand(1)))
       continue;
 
     const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 8fbe7d93e6f45..1134a4116f6d8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -129,7 +129,18 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
   case VPInstruction::BranchOnCond:
   case VPInstruction::BranchOnTwoConds:
   case VPInstruction::BranchOnCount:
+  case Instruction::Store:
     return Type::getVoidTy(Ctx);
+  case Instruction::Load:
+    return cast<LoadInst>(R->getUnderlyingValue())->getType();
+  case Instruction::Alloca:
+    return cast<AllocaInst>(R->getUnderlyingValue())->getType();
+  case Instruction::Call:
+    return cast<CallInst>(R->getUnderlyingValue())->getType();
+  case Instruction::GetElementPtr:
+    return inferScalarType(R->getOperand(0));
+  case Instruction::ExtractValue:
+    return cast<ExtractValueInst>(R->getUnderlyingValue())->getType();
   default:
     break;
   }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6e19c944fe186..f33fbcc7c9870 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1215,6 +1215,13 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
   }
 
   VPBuilder Builder(Def);
+
+  // Avoid replacing VPInstructions with underlying value with new
+  // VPInstructions. VPInstructions with underlying values should get
+  // widened/replicated later.
+  bool CanCreateNewRecipe =
+      !isa<VPInstruction>(Def) || !Def->getUnderlyingValue();
+
   VPValue *A;
   if (match(Def, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
     Type *TruncTy = TypeInfo.inferScalarType(Def);
@@ -1222,8 +1229,8 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
     if (TruncTy == ATy) {
       Def->replaceAllUsesWith(A);
     } else {
-      // Don't replace a scalarizing recipe with a widened cast.
-      if (isa<VPReplicateRecipe>(Def))
+      // Don't replace a non-widened cast recipe with a widened cast.
+      if (!isa<VPWidenCastRecipe>(Def))
         return;
       if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
 
@@ -1291,7 +1298,8 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
     return Def->replaceAllUsesWith(Def->getOperand(1));
 
   // (x && y) || (x && z) -> x && (y || z)
-  if (match(Def, m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
+  if (CanCreateNewRecipe &&
+      match(Def, m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
                               m_LogicalAnd(m_Deferred(X), m_VPValue(Z)))) &&
       // Simplify only if one of the operands has one use to avoid creating an
       // extra recipe.
@@ -1309,7 +1317,8 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
 
   // select c, false, true -> not c
   VPValue *C;
-  if (match(Def, m_Select(m_VPValue(C), m_False(), m_True())))
+  if (CanCreateNewRecipe &&
+      match(Def, m_Select(m_VPValue(C), m_False(), m_True())))
     return Def->replaceAllUsesWith(Builder.createNot(C));
 
   // select !c, x, y -> select c, y, x
@@ -1323,7 +1332,8 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
   // Reassociate (x && y) && z -> x && (y && z) if x has multiple users. With
   // tail folding it is likely that x is a header mask and can be simplified
   // further.
-  if (match(Def, m_LogicalAnd(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
+  if (CanCreateNewRecipe &&
+      match(Def, m_LogicalAnd(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
                               m_VPValue(Z))) &&
       X->hasMoreThanOneUniqueUser())
     return Def->replaceAllUsesWith(
@@ -1340,7 +1350,8 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
         Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0));
 
   const APInt *APC;
-  if (match(Def, m_c_Mul(m_VPValue(A), m_APInt(APC))) && APC->isPowerOf2())
+  if (CanCreateNewRecipe && match(Def, m_c_Mul(m_VPValue(A), m_APInt(APC))) &&
+      APC->isPowerOf2())
     return Def->replaceAllUsesWith(Builder.createNaryOp(
         Instruction::Shl,
         {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
@@ -1350,8 +1361,8 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
   // not allowed in them.
   const VPRegionBlock *ParentRegion = Def->getParent()->getParent();
   bool IsInReplicateRegion = ParentRegion && ParentRegion->isReplicator();
-  if (!IsInReplicateRegion && match(Def, m_UDiv(m_VPValue(A), m_APInt(APC))) &&
-      APC->isPowerOf2())
+  if (CanCreateNewRecipe && !IsInReplicateRegion &&
+      match(Def, m_UDiv(m_VPValue(A), m_APInt(APC))) && APC->isPowerOf2())
     return Def->replaceAllUsesWith(Builder.createNaryOp(
         Instruction::LShr,
         {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
@@ -1422,7 +1433,8 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
   // Fold (fcmp uno %X, %X) or (fcmp uno %Y, %Y) -> fcmp uno %X, %Y
   // This is useful for fmax/fmin without fast-math flags, where we need to
   // check if any operand is NaN.
-  if (match(Def, m_BinaryOr(m_SpecificCmp(CmpInst::FCMP_UNO, m_VPValue(X),
+  if (CanCreateNewRecipe &&
+      match(Def, m_BinaryOr(m_SpecificCmp(CmpInst::FCMP_UNO, m_VPValue(X),
                                           m_Deferred(X)),
                             m_SpecificCmp(CmpInst::FCMP_UNO, m_VPValue(Y),
                                           m_Deferred(Y))))) {
@@ -2173,6 +2185,11 @@ static bool
 sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
                                  VPRecipeBase *Previous,
                                  VPDominatorTree &VPDT) {
+  // If Previous is a live-in (no defining recipe), it naturally dominates all
+  // recipes in the loop, so no sinking is needed.
+  if (!Previous)
+    return true;
+
   // Collect recipes that need sinking.
   SmallVector<VPRecipeBase *> WorkList;
   SmallPtrSet<VPRecipeBase *, 8> Seen;
@@ -2349,8 +2366,9 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
 
     // Introduce a recipe to combine the incoming and previous values of a
     // fixed-order recurrence.
-    VPBasicBlock *InsertBlock = Previous->getParent();
-    if (isa<VPHeaderPHIRecipe>(Previous))
+    VPBasicBlock *InsertBlock =
+        Previous ? Previous->getParent() : FOR->getParent();
+    if (!Previous || isa<VPHeaderPHIRecipe>(Previous))
       LoopBuilder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());
     else
       LoopBuilder.setInsertPoint(InsertBlock,
@@ -3414,8 +3432,13 @@ void VPlanTransforms::replaceSymbolicStrides(
   // evolution.
   auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) {
     auto *R = cast<VPRecipeBase>(&U);
-    return R->getRegion() ||
-           R->getParent() == Plan.getVectorLoopRegion()->getSinglePredecessor();
+    if (R->getRegion())
+      return true;
+    // For VPlan0 (no VectorLoopRegion yet), allow replacement in the loop body.
+    auto *VectorLoopRegion = Plan.getVectorLoopRegion();
+    if (!VectorLoopRegion)
+      return !isa<VPIRBasicBlock>(R->getParent());
+    return R->getParent() == VectorLoopRegion->getSinglePredecessor();
   };
   ValueToSCEVMapTy RewriteMap;
   for (const SCEV *Stride : StridesMap.values()) {
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
index 2f78e72e106d6..40eee658f3421 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
@@ -686,253 +686,13 @@ exit:
 ; Test for https://github.com/llvm/llvm-project/issues/129236.
 define i32 @cost_ashr_with_op_known_invariant_via_scev(i8 %a) {
 ; CHECK-LABEL: @cost_ashr_with_op_known_invariant_via_scev(
-; CHECK-NEXT:  iter.check:
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i16 0, 0
 ; CHECK-NEXT:    [[CONV_I:%.*]] = sext i16 0 to i32
 ; CHECK-NEXT:    [[CONV5_I:%.*]] = sext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
-; CHECK:       vector.main.loop.iter.check:
-; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i1> poison, i1 [[CMP_I]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i1> [[BROADCAST_SPLATINSERT]], <32 x i1> poison, <32 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP60:%.*]] = xor <32 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
-; CHECK-NEXT:    br label [[LOOP_HEADER1:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE62:%.*]] ]
-; CHECK-NEXT:    [[TMP61:%.*]] = extractelement <32 x i1> [[TMP60]], i32 0
-; CHECK-NEXT:    br i1 [[TMP61]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
-; CHECK:       pred.urem.if:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE]]
-; CHECK:       pred.urem.continue:
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <32 x i1> [[TMP60]], i32 1
-; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_UREM_IF1:%.*]], label [[PRED_UREM_CONTINUE2:%.*]]
-; CHECK:       pred.urem.if1:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE2]]
-; CHECK:       pred.urem.continue2:
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <32 x i1> [[TMP60]], i32 2
-; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_UREM_IF3:%.*]], label [[PRED_UREM_CONTINUE4:%.*]]
-; CHECK:       pred.urem.if3:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE4]]
-; CHECK:       pred.urem.continue4:
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <32 x i1> [[TMP60]], i32 3
-; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_UREM_IF5:%.*]], label [[PRED_UREM_CONTINUE6:%.*]]
-; CHECK:       pred.urem.if5:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE6]]
-; CHECK:       pred.urem.continue6:
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <32 x i1> [[TMP60]], i32 4
-; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]]
-; CHECK:       pred.urem.if7:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE8]]
-; CHECK:       pred.urem.continue8:
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <32 x i1> [[TMP60]], i32 5
-; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]]
-; CHECK:       pred.urem.if9:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE10]]
-; CHECK:       pred.urem.continue10:
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <32 x i1> [[TMP60]], i32 6
-; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12:%.*]]
-; CHECK:       pred.urem.if11:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE12]]
-; CHECK:       pred.urem.continue12:
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <32 x i1> [[TMP60]], i32 7
-; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_UREM_IF13:%.*]], label [[PRED_UREM_CONTINUE14:%.*]]
-; CHECK:       pred.urem.if13:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE14]]
-; CHECK:       pred.urem.continue14:
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <32 x i1> [[TMP60]], i32 8
-; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_UREM_IF15:%.*]], label [[PRED_UREM_CONTINUE16:%.*]]
-; CHECK:       pred.urem.if15:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE16]]
-; CHECK:       pred.urem.continue16:
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <32 x i1> [[TMP60]], i32 9
-; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_UREM_IF17:%.*]], label [[PRED_UREM_CONTINUE18:%.*]]
-; CHECK:       pred.urem.if17:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE18]]
-; CHECK:       pred.urem.continue18:
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <32 x i1> [[TMP60]], i32 10
-; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_UREM_IF19:%.*]], label [[PRED_UREM_CONTINUE20:%.*]]
-; CHECK:       pred.urem.if19:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE20]]
-; CHECK:       pred.urem.continue20:
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <32 x i1> [[TMP60]], i32 11
-; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_UREM_IF21:%.*]], label [[PRED_UREM_CONTINUE22:%.*]]
-; CHECK:       pred.urem.if21:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE22]]
-; CHECK:       pred.urem.continue22:
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <32 x i1> [[TMP60]], i32 12
-; CHECK-NEXT:    br i1 [[TMP13]], label [[PRED_UREM_IF23:%.*]], label [[PRED_UREM_CONTINUE24:%.*]]
-; CHECK:       pred.urem.if23:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE24]]
-; CHECK:       pred.urem.continue24:
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <32 x i1> [[TMP60]], i32 13
-; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_UREM_IF25:%.*]], label [[PRED_UREM_CONTINUE26:%.*]]
-; CHECK:       pred.urem.if25:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE26]]
-; CHECK:       pred.urem.continue26:
-; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <32 x i1> [[TMP60]], i32 14
-; CHECK-NEXT:    br i1 [[TMP15]], label [[PRED_UREM_IF27:%.*]], label [[PRED_UREM_CONTINUE28:%.*]]
-; CHECK:       pred.urem.if27:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE28]]
-; CHECK:       pred.urem.continue28:
-; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <32 x i1> [[TMP60]], i32 15
-; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_UREM_IF29:%.*]], label [[PRED_UREM_CONTINUE30:%.*]]
-; CHECK:       pred.urem.if29:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE30]]
-; CHECK:       pred.urem.continue30:
-; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <32 x i1> [[TMP60]], i32 16
-; CHECK-NEXT:    br i1 [[TMP17]], label [[PRED_UREM_IF31:%.*]], label [[PRED_UREM_CONTINUE32:%.*]]
-; CHECK:       pred.urem.if31:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE32]]
-; CHECK:       pred.urem.continue32:
-; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <32 x i1> [[TMP60]], i32 17
-; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_UREM_IF33:%.*]], label [[PRED_UREM_CONTINUE34:%.*]]
-; CHECK:       pred.urem.if33:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE34]]
-; CHECK:       pred.urem.continue34:
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <32 x i1> [[TMP60]], i32 18
-; CHECK-NEXT:    br i1 [[TMP19]], label [[PRED_UREM_IF35:%.*]], label [[PRED_UREM_CONTINUE36:%.*]]
-; CHECK:       pred.urem.if35:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE36]]
-; CHECK:       pred.urem.continue36:
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <32 x i1> [[TMP60]], i32 19
-; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_UREM_IF37:%.*]], label [[PRED_UREM_CONTINUE38:%.*]]
-; CHECK:       pred.urem.if37:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE38]]
-; CHECK:       pred.urem.continue38:
-; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <32 x i1> [[TMP60]], i32 20
-; CHECK-NEXT:    br i1 [[TMP21]], label [[PRED_UREM_IF39:%.*]], label [[PRED_UREM_CONTINUE40:%.*]]
-; CHECK:       pred.urem.if39:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE40]]
-; CHECK:       pred.urem.continue40:
-; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <32 x i1> [[TMP60]], i32 21
-; CHECK-NEXT:    br i1 [[TMP22]], label [[PRED_UREM_IF41:%.*]], label [[PRED_UREM_CONTINUE42:%.*]]
-; CHECK:       pred.urem.if41:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE42]]
-; CHECK:       pred.urem.continue42:
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <32 x i1> [[TMP60]], i32 22
-; CHECK-NEXT:    br i1 [[TMP23]], label [[PRED_UREM_IF43:%.*]], label [[PRED_UREM_CONTINUE44:%.*]]
-; CHECK:       pred.urem.if43:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE44]]
-; CHECK:       pred.urem.continue44:
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <32 x i1> [[TMP60]], i32 23
-; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_UREM_IF45:%.*]], label [[PRED_UREM_CONTINUE46:%.*]]
-; CHECK:       pred.urem.if45:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE46]]
-; CHECK:       pred.urem.continue46:
-; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <32 x i1> [[TMP60]], i32 24
-; CHECK-NEXT:    br i1 [[TMP25]], label [[PRED_UREM_IF47:%.*]], label [[PRED_UREM_CONTINUE48:%.*]]
-; CHECK:       pred.urem.if47:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE48]]
-; CHECK:       pred.urem.continue48:
-; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <32 x i1> [[TMP60]], i32 25
-; CHECK-NEXT:    br i1 [[TMP26]], label [[PRED_UREM_IF49:%.*]], label [[PRED_UREM_CONTINUE50:%.*]]
-; CHECK:       pred.urem.if49:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE50]]
-; CHECK:       pred.urem.continue50:
-; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <32 x i1> [[TMP60]], i32 26
-; CHECK-NEXT:    br i1 [[TMP27]], label [[PRED_UREM_IF51:%.*]], label [[PRED_UREM_CONTINUE52:%.*]]
-; CHECK:       pred.urem.if51:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE52]]
-; CHECK:       pred.urem.continue52:
-; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <32 x i1> [[TMP60]], i32 27
-; CHECK-NEXT:    br i1 [[TMP28]], label [[PRED_UREM_IF53:%.*]], label [[PRED_UREM_CONTINUE54:%.*]]
-; CHECK:       pred.urem.if53:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE54]]
-; CHECK:       pred.urem.continue54:
-; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <32 x i1> [[TMP60]], i32 28
-; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_UREM_IF55:%.*]], label [[PRED_UREM_CONTINUE56:%.*]]
-; CHECK:       pred.urem.if55:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE56]]
-; CHECK:       pred.urem.continue56:
-; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <32 x i1> [[TMP60]], i32 29
-; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_UREM_IF57:%.*]], label [[PRED_UREM_CONTINUE58:%.*]]
-; CHECK:       pred.urem.if57:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE58]]
-; CHECK:       pred.urem.continue58:
-; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <32 x i1> [[TMP60]], i32 30
-; CHECK-NEXT:    br i1 [[TMP31]], label [[PRED_UREM_IF59:%.*]], label [[PRED_UREM_CONTINUE60:%.*]]
-; CHECK:       pred.urem.if59:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE60]]
-; CHECK:       pred.urem.continue60:
-; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <32 x i1> [[TMP60]], i32 31
-; CHECK-NEXT:    br i1 [[TMP32]], label [[PRED_UREM_IF61:%.*]], label [[PRED_UREM_CONTINUE62]]
-; CHECK:       pred.urem.if61:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE62]]
-; CHECK:       pred.urem.continue62:
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select i1 [[CMP_I]], <32 x i32> zeroinitializer, <32 x i32> poison
-; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <32 x i32> [[PREDPHI]], i32 0
-; CHECK-NEXT:    [[TMP34:%.*]] = ashr i32 [[CONV5_I]], [[TMP33]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT63:%.*]] = insertelement <32 x i32> poison, i32 [[TMP34]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT64:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT63]], <32 x i32> poison, <32 x i32> zeroinitializer
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
-; CHECK-NEXT:    [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
-; CHECK-NEXT:    br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP10:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP36:%.*]] = select <32 x i1> [[TMP60]], <32 x i1> poison, <32 x i1> zeroinitializer
-; CHECK-NEXT:    [[TMP37:%.*]] = or <32 x i1> [[TMP36]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP38:%.*]] = icmp eq <32 x i32> [[BROADCAST_SPLAT64]], zeroinitializer
-; CHECK-NEXT:    [[TMP39:%.*]] = shl <32 x i32> [[PREDPHI]], splat (i32 24)
-; CHECK-NEXT:    [[TMP40:%.*]] = ashr exact <32 x i32> [[TMP39]], splat (i32 24)
-; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <32 x i1> [[TMP38]], i32 0
-; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], <32 x i32> [[TMP40]], <32 x i32> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI65:%.*]] = select <32 x i1> [[TMP37]], <32 x i32> [[TMP42]], <32 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <32 x i32> [[PREDPHI65]], i32 31
-; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
-; CHECK:       vec.epilog.iter.check:
-; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]]
-; CHECK:       vec.epilog.ph:
-; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT66:%.*]] = insertelement <4 x i1> poison, i1 [[CMP_I]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT67:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT66]], <4 x i1> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP44:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT67]], splat (i1 true)
-; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
-; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX68:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT78:%.*]], [[PRED_UREM_CONTINUE76:%.*]] ]
-; CHECK-NEXT:    [[TMP45:%.*]] = extractelement <4 x i1> [[TMP44]], i32 0
-; CHECK-NEXT:    br i1 [[TMP45]], label [[PRED_UREM_IF69:%.*]], label [[PRED_UREM_CONTINUE70:%.*]]
-; CHECK:       pred.urem.if69:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE70]]
-; CHECK:       pred.urem.continue70:
-; CHECK-NEXT:    [[TMP46:%.*]] = extractelement <4 x i1> [[TMP44]], i32 1
-; CHECK-NEXT:    br i1 [[TMP46]], label [[PRED_UREM_IF71:%.*]], label [[PRED_UREM_CONTINUE72:%.*]]
-; CHECK:       pred.urem.if71:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE72]]
-; CHECK:       pred.urem.continue72:
-; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <4 x i1> [[TMP44]], i32 2
-; CHECK-NEXT:    br i1 [[TMP47]], label [[PRED_UREM_IF73:%.*]], label [[PRED_UREM_CONTINUE74:%.*]]
-; CHECK:       pred.urem.if73:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE74]]
-; CHECK:       pred.urem.continue74:
-; CHECK-NEXT:    [[TMP48:%.*]] = extractelement <4 x i1> [[TMP44]], i32 3
-; CHECK-NEXT:    br i1 [[TMP48]], label [[PRED_UREM_IF75:%.*]], label [[PRED_UREM_CONTINUE76]]
-; CHECK:       pred.urem.if75:
-; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE76]]
-; CHECK:       pred.urem.continue76:
-; CHECK-NEXT:    [[PREDPHI77:%.*]] = select i1 [[CMP_I]], <4 x i32> zeroinitializer, <4 x i32> poison
-; CHECK-NEXT:    [[TMP49:%.*]] = extractelement <4 x i32> [[PREDPHI77]], i32 0
-; CHECK-NEXT:    [[TMP50:%.*]] = ashr i32 [[CONV5_I]], [[TMP49]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT79:%.*]] = insertelement <4 x i32> poison, i32 [[TMP50]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT80:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT79]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[INDEX_NEXT78]] = add nuw i32 [[INDEX68]], 4
-; CHECK-NEXT:    [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT78]], 100
-; CHECK-NEXT:    br i1 [[TMP51]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
-; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    [[TMP52:%.*]] = select <4 x i1> [[TMP44]], <4 x i1> poison, <4 x i1> zeroinitializer
-; CHECK-NEXT:    [[TMP53:%.*]] = or <4 x i1> [[TMP52]], [[BROADCAST_SPLAT67]]
-; CHECK-NEXT:    [[TMP54:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT80]], zeroinitializer
-; CHECK-NEXT:    [[TMP55:%.*]] = shl <4 x i32> [[PREDPHI77]], splat (i32 24)
-; CHECK-NEXT:    [[TMP56:%.*]] = ashr exact <4 x i32> [[TMP55]], splat (i32 24)
-; CHECK-NEXT:    [[TMP57:%.*]] = extractelement <4 x i1> [[TMP54]], i32 0
-; CHECK-NEXT:    [[TMP58:%.*]] = select i1 [[TMP57]], <4 x i32> [[TMP56]], <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI81:%.*]] = select <4 x i1> [[TMP53]], <4 x i32> [[TMP58]], <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP59:%.*]] = extractelement <4 x i32> [[PREDPHI81]], i32 3
-; CHECK-NEXT:    br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
-; CHECK:       vec.epilog.scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 4, [[VEC_EPILOG_ITER_CHECK]] ], [ 100, [[ITER_CHECK:%.*]] ]
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; CHECK:       loop.header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ 100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
 ; CHECK-NEXT:    br i1 [[CMP_I]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[P_1:%.*]] = phi i32 [ [[REM_I:%.*]], [[ELSE]] ], [ 0, [[LOOP_HEADER]] ]
@@ -950,9 +710,9 @@ define i32 @cost_ashr_with_op_known_invariant_via_scev(i8 %a) {
 ; CHECK-NEXT:    [[P_2:%.*]] = phi i32 [ 0, [[ELSE]] ], [ [[TMP1]], [[THEN]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], -1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_HEADER]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[P_2_LCSSA:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ], [ [[TMP59]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[P_2_LCSSA:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ]
 ; CHECK-NEXT:    ret i32 [[P_2_LCSSA]]
 ;
 entry:
@@ -1078,7 +838,7 @@ define void @sdiv_by_zero(ptr noalias %src, ptr noalias %dst, i32 %d) #2 {
 ; CHECK-NEXT:    store <8 x i32> [[PREDPHI]], ptr [[TMP42]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br label [[SCALAR_PH:%.*]]
 ; CHECK:       scalar.ph:
@@ -1098,7 +858,7 @@ define void @sdiv_by_zero(ptr noalias %src, ptr noalias %dst, i32 %d) #2 {
 ; CHECK-NEXT:    store i32 [[MERGE]], ptr [[GEP_DST]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp ult i64 [[IV]], 16
-; CHECK-NEXT:    br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT:%.*]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
@@ -1435,14 +1195,14 @@ define i64 @test_predicated_udiv(i32 %d, i1 %c) #2 {
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], splat (i32 32)
 ; CHECK-NEXT:    [[TMP163:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
-; CHECK-NEXT:    br i1 [[TMP163]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP163]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[TMP207:%.*]] = zext <32 x i32> [[TMP161]] to <32 x i64>
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select i1 [[C]], <32 x i64> zeroinitializer, <32 x i64> [[TMP207]]
 ; CHECK-NEXT:    [[TMP164:%.*]] = extractelement <32 x i64> [[PREDPHI]], i32 31
 ; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; CHECK:       vec.epilog.iter.check:
-; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF17:![0-9]+]]
+; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF13:![0-9]+]]
 ; CHECK:       vec.epilog.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT63:%.*]] = insertelement <8 x i1> poison, i1 [[C]], i64 0
@@ -1531,7 +1291,7 @@ define i64 @test_predicated_udiv(i32 %d, i1 %c) #2 {
 ; CHECK-NEXT:    [[INDEX_NEXT86]] = add nuw i32 [[INDEX67]], 8
 ; CHECK-NEXT:    [[VEC_IND_NEXT87]] = add <8 x i32> [[VEC_IND68]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP208:%.*]] = icmp eq i32 [[INDEX_NEXT86]], 1000
-; CHECK-NEXT:    br i1 [[TMP208]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP208]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
 ; CHECK-NEXT:    [[TMP210:%.*]] = zext <8 x i32> [[TMP206]] to <8 x i64>
 ; CHECK-NEXT:    [[PREDPHI85:%.*]] = select i1 [[C]], <8 x i64> zeroinitializer, <8 x i64> [[TMP210]]
@@ -1552,7 +1312,7 @@ define i64 @test_predicated_udiv(i32 %d, i1 %c) #2 {
 ; CHECK-NEXT:    [[MERGE:%.*]] = phi i64 [ [[ZEXT]], [[THEN]] ], [ 0, [[LOOP_HEADER]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV]], 1000
-; CHECK-NEXT:    br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[MERGE_LCSSA:%.*]] = phi i64 [ [[MERGE]], [[LOOP_LATCH]] ], [ [[TMP164]], [[MIDDLE_BLOCK]] ], [ [[TMP209]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i64 [[MERGE_LCSSA]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll
index 2e56ef4975761..d8d08ce9e1d2e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll
@@ -6,104 +6,23 @@ target triple = "x86_64"
 define void @pr141968(i1 %cond, i8 %v, ptr %p) {
 ; CHECK-LABEL: define void @pr141968(
 ; CHECK-SAME: i1 [[COND:%.*]], i8 [[V:%.*]], ptr [[P:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[ZEXT_TRUE:%.*]] = zext i1 true to i16
 ; CHECK-NEXT:    [[SEXT:%.*]] = sext i8 [[V]] to i16
-; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
-; CHECK:       [[LOOP_HEADER]]:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[COND]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP0:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
-; CHECK-NEXT:    br label %[[COND_FALSE:.*]]
-; CHECK:       [[COND_FALSE]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE30:.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_SDIV_IF:.*]], label %[[LOOP_LATCH:.*]]
-; CHECK:       [[PRED_SDIV_IF]]:
-; CHECK-NEXT:    br label %[[LOOP_LATCH]]
-; CHECK:       [[LOOP_LATCH]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
-; CHECK-NEXT:    br i1 [[TMP2]], label %[[EXIT:.*]], label %[[PRED_SDIV_CONTINUE2:.*]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE2]]
-; CHECK:       [[PRED_SDIV_CONTINUE2]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
-; CHECK-NEXT:    br i1 [[TMP3]], label %[[PRED_SDIV_IF3:.*]], label %[[PRED_SDIV_CONTINUE4:.*]]
-; CHECK:       [[PRED_SDIV_IF3]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE4]]
-; CHECK:       [[PRED_SDIV_CONTINUE4]]:
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
-; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_SDIV_IF5:.*]], label %[[PRED_SDIV_CONTINUE6:.*]]
-; CHECK:       [[PRED_SDIV_IF5]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE6]]
-; CHECK:       [[PRED_SDIV_CONTINUE6]]:
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[PRED_SDIV_IF7:.*]], label %[[PRED_SDIV_CONTINUE8:.*]]
-; CHECK:       [[PRED_SDIV_IF7]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE8]]
-; CHECK:       [[PRED_SDIV_CONTINUE8]]:
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
-; CHECK-NEXT:    br i1 [[TMP6]], label %[[PRED_SDIV_IF9:.*]], label %[[PRED_SDIV_CONTINUE10:.*]]
-; CHECK:       [[PRED_SDIV_IF9]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE10]]
-; CHECK:       [[PRED_SDIV_CONTINUE10]]:
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
-; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_SDIV_IF11:.*]], label %[[PRED_SDIV_CONTINUE12:.*]]
-; CHECK:       [[PRED_SDIV_IF11]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE12]]
-; CHECK:       [[PRED_SDIV_CONTINUE12]]:
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
-; CHECK-NEXT:    br i1 [[TMP8]], label %[[PRED_SDIV_IF13:.*]], label %[[PRED_SDIV_CONTINUE14:.*]]
-; CHECK:       [[PRED_SDIV_IF13]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE14]]
-; CHECK:       [[PRED_SDIV_CONTINUE14]]:
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
-; CHECK-NEXT:    br i1 [[TMP9]], label %[[PRED_SDIV_IF15:.*]], label %[[PRED_SDIV_CONTINUE16:.*]]
-; CHECK:       [[PRED_SDIV_IF15]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE16]]
-; CHECK:       [[PRED_SDIV_CONTINUE16]]:
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
-; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_SDIV_IF17:.*]], label %[[PRED_SDIV_CONTINUE18:.*]]
-; CHECK:       [[PRED_SDIV_IF17]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE18]]
-; CHECK:       [[PRED_SDIV_CONTINUE18]]:
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
-; CHECK-NEXT:    br i1 [[TMP11]], label %[[PRED_SDIV_IF19:.*]], label %[[PRED_SDIV_CONTINUE20:.*]]
-; CHECK:       [[PRED_SDIV_IF19]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE20]]
-; CHECK:       [[PRED_SDIV_CONTINUE20]]:
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
-; CHECK-NEXT:    br i1 [[TMP12]], label %[[PRED_SDIV_IF21:.*]], label %[[PRED_SDIV_CONTINUE22:.*]]
-; CHECK:       [[PRED_SDIV_IF21]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE22]]
-; CHECK:       [[PRED_SDIV_CONTINUE22]]:
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
-; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_SDIV_IF23:.*]], label %[[PRED_SDIV_CONTINUE24:.*]]
-; CHECK:       [[PRED_SDIV_IF23]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE24]]
-; CHECK:       [[PRED_SDIV_CONTINUE24]]:
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
-; CHECK-NEXT:    br i1 [[TMP14]], label %[[PRED_SDIV_IF25:.*]], label %[[PRED_SDIV_CONTINUE26:.*]]
-; CHECK:       [[PRED_SDIV_IF25]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE26]]
-; CHECK:       [[PRED_SDIV_CONTINUE26]]:
-; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
-; CHECK-NEXT:    br i1 [[TMP15]], label %[[PRED_SDIV_IF27:.*]], label %[[PRED_SDIV_CONTINUE28:.*]]
-; CHECK:       [[PRED_SDIV_IF27]]:
-; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE28]]
+; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE28:.*]]
 ; CHECK:       [[PRED_SDIV_CONTINUE28]]:
-; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
-; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_SDIV_IF29:.*]], label %[[PRED_SDIV_CONTINUE30]]
+; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[PRED_SDIV_CONTINUE30:.*]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br i1 [[COND]], label %[[PRED_SDIV_CONTINUE30]], label %[[PRED_SDIV_IF29:.*]]
 ; CHECK:       [[PRED_SDIV_IF29]]:
+; CHECK-NEXT:    [[SDIV:%.*]] = sdiv i16 [[SEXT]], [[ZEXT_TRUE]]
+; CHECK-NEXT:    [[SDIV_TRUNC:%.*]] = trunc i16 [[SDIV]] to i8
 ; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE30]]
 ; CHECK:       [[PRED_SDIV_CONTINUE30]]:
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select i1 [[COND]], i8 0, i8 [[V]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = phi i8 [ [[SDIV_TRUNC]], %[[PRED_SDIV_IF29]] ], [ 0, %[[PRED_SDIV_CONTINUE28]] ]
 ; CHECK-NEXT:    store i8 [[PREDPHI]], ptr [[P]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
-; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
-; CHECK-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[COND_FALSE]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br label %[[EXIT1:.*]]
+; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i8 [[IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT1:.*]], label %[[PRED_SDIV_CONTINUE28]]
 ; CHECK:       [[EXIT1]]:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
index d8780c4836f4f..76f30decf81e7 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -696,7 +696,6 @@ define i1 @reduction_with_const_or(ptr %A, i8 %n) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> splat (i1 true))
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
 ; CHECK:       [[VEC_EPILOG_ITER_CHECK]]:
@@ -725,7 +724,6 @@ define i1 @reduction_with_const_or(ptr %A, i8 %n) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
 ; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> splat (i1 true))
 ; CHECK-NEXT:    [[CMP_N8:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[CMP_N8]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       [[VEC_EPILOG_SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll b/llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll
index f2d6834c91d53..5264bb59fafe3 100644
--- a/llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll
@@ -10,21 +10,8 @@ define void @pr154045(ptr %p, i1 %c, i64 %x) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]]
-; CHECK:       [[PRED_SREM_IF]]:
-; CHECK-NEXT:    br label %[[PRED_SREM_CONTINUE]]
-; CHECK:       [[PRED_SREM_CONTINUE]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
-; CHECK-NEXT:    br i1 [[TMP2]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]]
-; CHECK:       [[PRED_SREM_IF1]]:
-; CHECK-NEXT:    br label %[[PRED_SREM_CONTINUE2]]
-; CHECK:       [[PRED_SREM_CONTINUE2]]:
 ; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
 ; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll b/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll
index 92af82868ad1e..d251248d6fba1 100644
--- a/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll
@@ -12,8 +12,8 @@ define void @neg_cond(ptr noalias %p, ptr noalias %q) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 42)
-; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> splat (i32 42), <4 x i32> splat (i32 43)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 42)
+; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 43), <4 x i32> splat (i32 42)
 ; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024

>From 220bf80c60ba72ac37e32ae36d93bbf79187bab9 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 11 Feb 2026 14:04:30 +0000
Subject: [PATCH 2/4] !fixup address comments, thanks

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp    | 12 ++++++------
 .../X86/drop-poison-generating-flags.ll              |  4 +---
 llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll  |  4 ++--
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8a744630c1def..69800e2750a12 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1220,9 +1220,12 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
 
   VPBuilder Builder(Def);
 
-  // Avoid replacing VPInstructions with underlying value with new
+  // Avoid replacing VPInstructions with underlying values with new
   // VPInstructions. VPInstructions with underlying values should get
   // widened/replicated later.
+  // TODO: We should also not replace non-VPInstructions like VPWidenRecipe with
+  // VPInstructions without underlying values, as those will get skipped during
+  // cost computation.
   bool CanCreateNewRecipe =
       !isa<VPInstruction>(Def) || !Def->getUnderlyingValue();
 
@@ -3400,11 +3403,8 @@ void VPlanTransforms::replaceSymbolicStrides(
     auto *R = cast<VPRecipeBase>(&U);
     if (R->getRegion())
       return true;
-    // For VPlan0 (no VectorLoopRegion yet), allow replacement in the loop body.
-    auto *VectorLoopRegion = Plan.getVectorLoopRegion();
-    if (!VectorLoopRegion)
-      return !isa<VPIRBasicBlock>(R->getParent());
-    return R->getParent() == VectorLoopRegion->getSinglePredecessor();
+    return R->getRegion() ||
+           R->getParent() == Plan.getVectorLoopRegion()->getSinglePredecessor();
   };
   ValueToSCEVMapTy RewriteMap;
   for (const SCEV *Stride : StridesMap.values()) {
diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
index 4423b89e981b9..f20bf95af4b58 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
@@ -747,9 +747,7 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst
 ; CHECK:       [[PRED_LOAD_CONTINUE6]]:
 ; CHECK-NEXT:    [[TMP26:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TMP26]], <4 x i8> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> poison, <4 x i64> zeroinitializer
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i64> [[PREDPHI7]], i32 3
-; CHECK-NEXT:    store i64 [[TMP12]], ptr [[AUX]], align 8
+; CHECK-NEXT:    store i64 0, ptr [[AUX]], align 8
 ; CHECK-NEXT:    store <4 x i8> [[PREDPHI]], ptr [[DST]], align 4
 ; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
index becf295fb996b..23e1a6d9b81fc 100644
--- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
@@ -32,8 +32,8 @@ define i32 @more_than_one_use(ptr %a, i64 %n) {
 ; CHECK-NEXT:      ir<%i> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0]]>
 ; CHECK-NEXT:      WIDEN-REDUCTION-PHI ir<%r> = phi vp<[[VP4]]>, ir<%tmp3>
 ; CHECK-NEXT:      WIDEN ir<%i.next> = add nuw nsw ir<%i>, ir<1>
-; CHECK-NEXT:      WIDEN ir<%cond> = icmp sge ir<%i.next>, ir<%n>
-; CHECK-NEXT:      WIDEN ir<%tmp0> = select ir<%cond>, ir<0>, ir<%i.next>
+; CHECK-NEXT:      WIDEN ir<%cond> = icmp slt ir<%i.next>, ir<%n>
+; CHECK-NEXT:      WIDEN ir<%tmp0> = select ir<%cond>, ir<%i.next>, ir<0>
 ; CHECK-NEXT:      REPLICATE ir<%tmp1> = getelementptr inbounds ir<%a>, ir<%tmp0>
 ; CHECK-NEXT:      REPLICATE ir<%tmp2> = load ir<%tmp1>
 ; CHECK-NEXT:      WIDEN ir<%tmp3> = add ir<%r>, ir<%tmp2>

>From 36ee2b80ee339b2609464a14dac723085ec13181 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 11 Feb 2026 14:11:46 +0000
Subject: [PATCH 3/4] !fixup fix formatting

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 69800e2750a12..e8e91a074e106 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1314,7 +1314,7 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
 
   // (x && y) | (x && z) -> x && (y | z)
   if (CanCreateNewRecipe &&
-match(Def, m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
+      match(Def, m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
                               m_LogicalAnd(m_Deferred(X), m_VPValue(Z)))) &&
       // Simplify only if one of the operands has one use to avoid creating an
       // extra recipe.

>From 7b4986173afcac9d01d4eedb4cfd3bd0e1c73783 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 11 Feb 2026 16:53:24 +0000
Subject: [PATCH 4/4] !fixup adjust comment, remove more leftover

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index e8e91a074e106..7eb4bc5a78ad7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1221,8 +1221,10 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
   VPBuilder Builder(Def);
 
   // Avoid replacing VPInstructions with underlying values with new
-  // VPInstructions. VPInstructions with underlying values should get
-  // widened/replicated later.
+  // VPInstructions, as we would fail to create widen/replicate recpes from the
+  // new VPInstructions without an underlying value, and miss out on some
+  // transformations that only apply to widened/replicated recipes later, by
+  // doing so.
   // TODO: We should also not replace non-VPInstructions like VPWidenRecipe with
   // VPInstructions without underlying values, as those will get skipped during
   // cost computation.
@@ -3401,8 +3403,6 @@ void VPlanTransforms::replaceSymbolicStrides(
   // evolution.
   auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) {
     auto *R = cast<VPRecipeBase>(&U);
-    if (R->getRegion())
-      return true;
     return R->getRegion() ||
            R->getParent() == Plan.getVectorLoopRegion()->getSinglePredecessor();
   };



More information about the llvm-commits mailing list