[llvm] [LV] Remove EVLIndVarSimplify pass (PR #160454)

Wed Sep 24 00:27:56 PDT 2025

https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/160454

Initially this was needed to replace the fixed-step canonical IV with the variable-step EVL IV, but this was eventually superseded by the loop vectorizer doing this transform itself in #147222. The pass was then removed from the RISC-V pipeline in #151483 and the loop vectorizer stopped emitting the metadata used by the pass in #155760, so now there's no users of it.


>From d03e9079238bf09c62ad99007062ac33da856546 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 24 Sep 2025 15:23:27 +0800
Subject: [PATCH] [LV] Remove EVLIndVarSimplify pass

Initially this was needed to replace the fixed-step canonical IV with the variable-step EVL IV, but this was eventually superseded by the loop vectorizer doing this transform itself in #147222. The pass was then removed from the RISC-V pipeline in #151483 and the loop vectorizer stopped emitting the metadata used by the pass in #155760, so now there's no users of it.
---
 .../Transforms/Vectorize/EVLIndVarSimplify.h  |  31 --
 llvm/lib/Passes/PassBuilder.cpp               |   1 -
 llvm/lib/Passes/PassRegistry.def              |   1 -
 llvm/lib/Transforms/Vectorize/CMakeLists.txt  |   1 -
 .../Vectorize/EVLIndVarSimplify.cpp           | 300 ----------------
 .../LoopVectorize/RISCV/evl-iv-simplify.ll    | 333 ------------------
 6 files changed, 667 deletions(-)
 delete mode 100644 llvm/include/llvm/Transforms/Vectorize/EVLIndVarSimplify.h
 delete mode 100644 llvm/lib/Transforms/Vectorize/EVLIndVarSimplify.cpp
 delete mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/evl-iv-simplify.ll

diff --git a/llvm/include/llvm/Transforms/Vectorize/EVLIndVarSimplify.h b/llvm/include/llvm/Transforms/Vectorize/EVLIndVarSimplify.h
deleted file mode 100644
index 3178dc762a195..0000000000000
--- a/llvm/include/llvm/Transforms/Vectorize/EVLIndVarSimplify.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===------ EVLIndVarSimplify.h - Optimize vectorized loops w/ EVL IV------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass optimizes a vectorized loop with canonical IV to using EVL-based
-// IV if it was tail-folded by predicated EVL.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_VECTORIZE_EVLINDVARSIMPLIFY_H
-#define LLVM_TRANSFORMS_VECTORIZE_EVLINDVARSIMPLIFY_H
-
-#include "llvm/Analysis/LoopAnalysisManager.h"
-#include "llvm/IR/PassManager.h"
-
-namespace llvm {
-class Loop;
-class LPMUpdater;
-
-/// Turn vectorized loops with canonical induction variables into loops that
-/// only use a single EVL-based induction variable.
-struct EVLIndVarSimplifyPass : public PassInfoMixin<EVLIndVarSimplifyPass> {
-  PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM,
-                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
-};
-} // namespace llvm
-#endif
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index e4dab4acc0b4a..f84a16bd97224 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -375,7 +375,6 @@
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 #include "llvm/Transforms/Utils/UnifyLoopExits.h"
-#include "llvm/Transforms/Vectorize/EVLIndVarSimplify.h"
 #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
 #include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h"
 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 49d5d08474f0f..f0e7d36f78aab 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -755,7 +755,6 @@ LOOP_ANALYSIS("should-run-extra-simple-loop-unswitch",
 #endif
 LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass())
 LOOP_PASS("dot-ddg", DDGDotPrinterPass())
-LOOP_PASS("evl-iv-simplify", EVLIndVarSimplifyPass())
 LOOP_PASS("guard-widening", GuardWideningPass())
 LOOP_PASS("extra-simple-loop-unswitch-passes",
           ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch>())
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 96670fe3ea195..9f4a242214471 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -1,5 +1,4 @@
 add_llvm_component_library(LLVMVectorize
-  EVLIndVarSimplify.cpp
   LoadStoreVectorizer.cpp
   LoopIdiomVectorize.cpp
   LoopVectorizationLegality.cpp
diff --git a/llvm/lib/Transforms/Vectorize/EVLIndVarSimplify.cpp b/llvm/lib/Transforms/Vectorize/EVLIndVarSimplify.cpp
deleted file mode 100644
index 5dd689799b828..0000000000000
--- a/llvm/lib/Transforms/Vectorize/EVLIndVarSimplify.cpp
+++ /dev/null
@@ -1,300 +0,0 @@
-//===---- EVLIndVarSimplify.cpp - Optimize vectorized loops w/ EVL IV------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass optimizes a vectorized loop with canonical IV to using EVL-based
-// IV if it was tail-folded by predicated EVL.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Vectorize/EVLIndVarSimplify.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/IVDescriptors.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
-#include "llvm/Transforms/Utils/Local.h"
-
-#define DEBUG_TYPE "evl-iv-simplify"
-
-using namespace llvm;
-
-STATISTIC(NumEliminatedCanonicalIV, "Number of canonical IVs we eliminated");
-
-static cl::opt<bool> EnableEVLIndVarSimplify(
-    "enable-evl-indvar-simplify",
-    cl::desc("Enable EVL-based induction variable simplify Pass"), cl::Hidden,
-    cl::init(true));
-
-namespace {
-struct EVLIndVarSimplifyImpl {
-  ScalarEvolution &SE;
-  OptimizationRemarkEmitter *ORE = nullptr;
-
-  EVLIndVarSimplifyImpl(LoopStandardAnalysisResults &LAR,
-                        OptimizationRemarkEmitter *ORE)
-      : SE(LAR.SE), ORE(ORE) {}
-
-  /// Returns true if modify the loop.
-  bool run(Loop &L);
-};
-} // anonymous namespace
-
-/// Returns the constant part of vectorization factor from the induction
-/// variable's step value SCEV expression.
-static uint32_t getVFFromIndVar(const SCEV *Step, const Function &F) {
-  if (!Step)
-    return 0U;
-
-  // Looking for loops with IV step value in the form of `(<constant VF> x
-  // vscale)`.
-  if (const auto *Mul = dyn_cast<SCEVMulExpr>(Step)) {
-    if (Mul->getNumOperands() == 2) {
-      const SCEV *LHS = Mul->getOperand(0);
-      const SCEV *RHS = Mul->getOperand(1);
-      if (const auto *Const = dyn_cast<SCEVConstant>(LHS);
-          Const && isa<SCEVVScale>(RHS)) {
-        uint64_t V = Const->getAPInt().getLimitedValue();
-        if (llvm::isUInt<32>(V))
-          return V;
-      }
-    }
-  }
-
-  // If not, see if the vscale_range of the parent function is a fixed value,
-  // which makes the step value to be replaced by a constant.
-  if (F.hasFnAttribute(Attribute::VScaleRange))
-    if (const auto *ConstStep = dyn_cast<SCEVConstant>(Step)) {
-      APInt V = ConstStep->getAPInt().abs();
-      ConstantRange CR = llvm::getVScaleRange(&F, 64);
-      if (const APInt *Fixed = CR.getSingleElement()) {
-        V = V.zextOrTrunc(Fixed->getBitWidth());
-        uint64_t VF = V.udiv(*Fixed).getLimitedValue();
-        if (VF && llvm::isUInt<32>(VF) &&
-            // Make sure step is divisible by vscale.
-            V.urem(*Fixed).isZero())
-          return VF;
-      }
-    }
-
-  return 0U;
-}
-
-bool EVLIndVarSimplifyImpl::run(Loop &L) {
-  if (!EnableEVLIndVarSimplify)
-    return false;
-
-  if (!getBooleanLoopAttribute(&L, "llvm.loop.isvectorized"))
-    return false;
-  const MDOperand *EVLMD =
-      findStringMetadataForLoop(&L, "llvm.loop.isvectorized.tailfoldingstyle")
-          .value_or(nullptr);
-  if (!EVLMD || !EVLMD->equalsStr("evl"))
-    return false;
-
-  BasicBlock *LatchBlock = L.getLoopLatch();
-  ICmpInst *OrigLatchCmp = L.getLatchCmpInst();
-  if (!LatchBlock || !OrigLatchCmp)
-    return false;
-
-  InductionDescriptor IVD;
-  PHINode *IndVar = L.getInductionVariable(SE);
-  if (!IndVar || !L.getInductionDescriptor(SE, IVD)) {
-    const char *Reason = (IndVar ? "induction descriptor is not available"
-                                 : "cannot recognize induction variable");
-    LLVM_DEBUG(dbgs() << "Cannot retrieve IV from loop " << L.getName()
-                      << " because" << Reason << "\n");
-    if (ORE) {
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "UnrecognizedIndVar",
-                                        L.getStartLoc(), L.getHeader())
-               << "Cannot retrieve IV because " << ore::NV("Reason", Reason);
-      });
-    }
-    return false;
-  }
-
-  BasicBlock *InitBlock, *BackEdgeBlock;
-  if (!L.getIncomingAndBackEdge(InitBlock, BackEdgeBlock)) {
-    LLVM_DEBUG(dbgs() << "Expect unique incoming and backedge in "
-                      << L.getName() << "\n");
-    if (ORE) {
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "UnrecognizedLoopStructure",
-                                        L.getStartLoc(), L.getHeader())
-               << "Does not have a unique incoming and backedge";
-      });
-    }
-    return false;
-  }
-
-  // Retrieve the loop bounds.
-  std::optional<Loop::LoopBounds> Bounds = L.getBounds(SE);
-  if (!Bounds) {
-    LLVM_DEBUG(dbgs() << "Could not obtain the bounds for loop " << L.getName()
-                      << "\n");
-    if (ORE) {
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "UnrecognizedLoopStructure",
-                                        L.getStartLoc(), L.getHeader())
-               << "Could not obtain the loop bounds";
-      });
-    }
-    return false;
-  }
-  Value *CanonicalIVInit = &Bounds->getInitialIVValue();
-  Value *CanonicalIVFinal = &Bounds->getFinalIVValue();
-
-  const SCEV *StepV = IVD.getStep();
-  uint32_t VF = getVFFromIndVar(StepV, *L.getHeader()->getParent());
-  if (!VF) {
-    LLVM_DEBUG(dbgs() << "Could not infer VF from IndVar step '" << *StepV
-                      << "'\n");
-    if (ORE) {
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "UnrecognizedIndVar",
-                                        L.getStartLoc(), L.getHeader())
-               << "Could not infer VF from IndVar step "
-               << ore::NV("Step", StepV);
-      });
-    }
-    return false;
-  }
-  LLVM_DEBUG(dbgs() << "Using VF=" << VF << " for loop " << L.getName()
-                    << "\n");
-
-  // Try to find the EVL-based induction variable.
-  using namespace PatternMatch;
-  BasicBlock *BB = IndVar->getParent();
-
-  Value *EVLIndVar = nullptr;
-  Value *RemTC = nullptr;
-  Value *TC = nullptr;
-  auto IntrinsicMatch = m_Intrinsic<Intrinsic::experimental_get_vector_length>(
-      m_Value(RemTC), m_SpecificInt(VF),
-      /*Scalable=*/m_SpecificInt(1));
-  for (PHINode &PN : BB->phis()) {
-    if (&PN == IndVar)
-      continue;
-
-    // Check 1: it has to contain both incoming (init) & backedge blocks
-    // from IndVar.
-    if (PN.getBasicBlockIndex(InitBlock) < 0 ||
-        PN.getBasicBlockIndex(BackEdgeBlock) < 0)
-      continue;
-    // Check 2: EVL index is always increasing, thus its inital value has to be
-    // equal to either the initial IV value (when the canonical IV is also
-    // increasing) or the last IV value (when canonical IV is decreasing).
-    Value *Init = PN.getIncomingValueForBlock(InitBlock);
-    using Direction = Loop::LoopBounds::Direction;
-    switch (Bounds->getDirection()) {
-    case Direction::Increasing:
-      if (Init != CanonicalIVInit)
-        continue;
-      break;
-    case Direction::Decreasing:
-      if (Init != CanonicalIVFinal)
-        continue;
-      break;
-    case Direction::Unknown:
-      // To be more permissive and see if either the initial or final IV value
-      // matches PN's init value.
-      if (Init != CanonicalIVInit && Init != CanonicalIVFinal)
-        continue;
-      break;
-    }
-    Value *RecValue = PN.getIncomingValueForBlock(BackEdgeBlock);
-    assert(RecValue && "expect recurrent IndVar value");
-
-    LLVM_DEBUG(dbgs() << "Found candidate PN of EVL-based IndVar: " << PN
-                      << "\n");
-
-    // Check 3: Pattern match to find the EVL-based index and total trip count
-    // (TC).
-    if (match(RecValue,
-              m_c_Add(m_ZExtOrSelf(IntrinsicMatch), m_Specific(&PN))) &&
-        match(RemTC, m_Sub(m_Value(TC), m_Specific(&PN)))) {
-      EVLIndVar = RecValue;
-      break;
-    }
-  }
-
-  if (!EVLIndVar || !TC)
-    return false;
-
-  LLVM_DEBUG(dbgs() << "Using " << *EVLIndVar << " for EVL-based IndVar\n");
-  if (ORE) {
-    ORE->emit([&]() {
-      DebugLoc DL;
-      BasicBlock *Region = nullptr;
-      if (auto *I = dyn_cast<Instruction>(EVLIndVar)) {
-        DL = I->getDebugLoc();
-        Region = I->getParent();
-      } else {
-        DL = L.getStartLoc();
-        Region = L.getHeader();
-      }
-      return OptimizationRemark(DEBUG_TYPE, "UseEVLIndVar", DL, Region)
-             << "Using " << ore::NV("EVLIndVar", EVLIndVar)
-             << " for EVL-based IndVar";
-    });
-  }
-
-  // Create an EVL-based comparison and replace the branch to use it as
-  // predicate.
-
-  // Loop::getLatchCmpInst check at the beginning of this function has ensured
-  // that latch block ends in a conditional branch.
-  auto *LatchBranch = cast<BranchInst>(LatchBlock->getTerminator());
-  assert(LatchBranch->isConditional() &&
-         "expect the loop latch to be ended with a conditional branch");
-  ICmpInst::Predicate Pred;
-  if (LatchBranch->getSuccessor(0) == L.getHeader())
-    Pred = ICmpInst::ICMP_NE;
-  else
-    Pred = ICmpInst::ICMP_EQ;
-
-  IRBuilder<> Builder(OrigLatchCmp);
-  auto *NewLatchCmp = Builder.CreateICmp(Pred, EVLIndVar, TC);
-  OrigLatchCmp->replaceAllUsesWith(NewLatchCmp);
-
-  // llvm::RecursivelyDeleteDeadPHINode only deletes cycles whose values are
-  // not used outside the cycles. However, in this case the now-RAUW-ed
-  // OrigLatchCmp will be considered a use outside the cycle while in reality
-  // it's practically dead. Thus we need to remove it before calling
-  // RecursivelyDeleteDeadPHINode.
-  (void)RecursivelyDeleteTriviallyDeadInstructions(OrigLatchCmp);
-  if (llvm::RecursivelyDeleteDeadPHINode(IndVar))
-    LLVM_DEBUG(dbgs() << "Removed original IndVar\n");
-
-  ++NumEliminatedCanonicalIV;
-
-  return true;
-}
-
-PreservedAnalyses EVLIndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &LAM,
-                                             LoopStandardAnalysisResults &AR,
-                                             LPMUpdater &U) {
-  Function &F = *L.getHeader()->getParent();
-  auto &FAMProxy = LAM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR);
-  OptimizationRemarkEmitter *ORE =
-      FAMProxy.getCachedResult<OptimizationRemarkEmitterAnalysis>(F);
-
-  if (EVLIndVarSimplifyImpl(AR, ORE).run(L))
-    return PreservedAnalyses::allInSet<CFGAnalyses>();
-  return PreservedAnalyses::all();
-}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-iv-simplify.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-iv-simplify.ll
deleted file mode 100644
index 4de0e666149f3..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-iv-simplify.ll
+++ /dev/null
@@ -1,333 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -S -mtriple=riscv64 -mattr='+v' --passes='loop(evl-iv-simplify)' < %s | FileCheck %s
-; RUN: opt -S -mtriple=riscv64 -mattr='+v' --passes='loop(evl-iv-simplify),function(simplifycfg,dce)' < %s | FileCheck %s --check-prefix=LOOP-DEL
-
-define void @simple(ptr noalias %a, ptr noalias %b, <vscale x 4 x i32> %c, i64 %N) vscale_range(2, 1024) {
-; CHECK-LABEL: define void @simple(
-; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], <vscale x 4 x i32> [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
-; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
-; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
-; CHECK-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], 1
-; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]]
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
-; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
-; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
-; CHECK-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
-; CHECK-NEXT:    [[TMP18:%.*]] = add nsw <vscale x 4 x i32> [[C]], [[VP_OP_LOAD1]]
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
-; CHECK-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP18]], ptr align 4 [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
-; CHECK-NEXT:    [[TMP21:%.*]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
-; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[ADD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    ret void
-;
-; LOOP-DEL-LABEL: define void @simple(
-; LOOP-DEL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], <vscale x 4 x i32> [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; LOOP-DEL-NEXT:  entry:
-; LOOP-DEL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
-; LOOP-DEL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; LOOP-DEL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
-; LOOP-DEL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
-; LOOP-DEL-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
-; LOOP-DEL:       vector.ph:
-; LOOP-DEL-NEXT:    br label [[VECTOR_BODY:%.*]]
-; LOOP-DEL:       vector.body:
-; LOOP-DEL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; LOOP-DEL-NEXT:    [[TMP4:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
-; LOOP-DEL-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP4]], i32 4, i1 true)
-; LOOP-DEL-NEXT:    [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0
-; LOOP-DEL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP6]]
-; LOOP-DEL-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
-; LOOP-DEL-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP10]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
-; LOOP-DEL-NEXT:    [[TMP11:%.*]] = add nsw <vscale x 4 x i32> [[C]], [[VP_OP_LOAD1]]
-; LOOP-DEL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
-; LOOP-DEL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
-; LOOP-DEL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP11]], ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
-; LOOP-DEL-NEXT:    [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
-; LOOP-DEL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
-; LOOP-DEL-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
-; LOOP-DEL-NEXT:    br i1 [[TMP15]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; LOOP-DEL:       for.body:
-; LOOP-DEL-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; LOOP-DEL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; LOOP-DEL-NEXT:    [[ADD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; LOOP-DEL-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
-; LOOP-DEL-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4
-; LOOP-DEL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; LOOP-DEL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; LOOP-DEL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; LOOP-DEL:       for.cond.cleanup:
-; LOOP-DEL-NEXT:    ret void
-;
-entry:
-  %0 = sub i64 -1, %N
-  %1 = call i64 @llvm.vscale.i64()
-  %2 = mul i64 %1, 4
-  %3 = icmp ult i64 %0, %2
-  br i1 %3, label %scalar.ph, label %vector.ph
-
-vector.ph:                                        ; preds = %entry
-  %4 = call i64 @llvm.vscale.i64()
-  %5 = mul i64 %4, 4
-  %6 = call i64 @llvm.vscale.i64()
-  %7 = mul i64 %6, 4
-  %8 = sub i64 %7, 1
-  %n.rnd.up = add i64 %N, %8
-  %n.mod.vf = urem i64 %n.rnd.up, %5
-  %n.vec = sub i64 %n.rnd.up, %n.mod.vf
-  %9 = call i64 @llvm.vscale.i64()
-  %10 = mul i64 %9, 4
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ]
-  %11 = sub i64 %N, %evl.based.iv
-  %12 = call i32 @llvm.experimental.get.vector.length.i64(i64 %11, i32 4, i1 true)
-  %13 = add i64 %evl.based.iv, 0
-  %14 = getelementptr inbounds i32, ptr %b, i64 %13
-  %15 = getelementptr inbounds i32, ptr %14, i32 0
-  %vp.op.load = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %15, <vscale x 4 x i1> splat (i1 true), i32 %12)
-  %18 = add nsw <vscale x 4 x i32> %c, %vp.op.load
-  %19 = getelementptr inbounds i32, ptr %a, i64 %13
-  %20 = getelementptr inbounds i32, ptr %19, i32 0
-  call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %18, ptr align 4 %20, <vscale x 4 x i1> splat (i1 true), i32 %12)
-  %21 = zext i32 %12 to i64
-  %index.evl.next = add i64 %21, %evl.based.iv
-  %index.next = add nuw i64 %index, %10
-  %22 = icmp eq i64 %index.next, %n.vec
-  br i1 %22, label %middle.block, label %vector.body, !llvm.loop !0
-
-middle.block:                                     ; preds = %vector.body
-  br i1 true, label %for.cond.cleanup, label %scalar.ph
-
-scalar.ph:                                        ; preds = %entry, %middle.block
-  %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ]
-  br label %for.body
-
-for.body:                                         ; preds = %for.body, %scalar.ph
-  %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv
-  %23 = load i32, ptr %arrayidx, align 4
-  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv
-  store i32 %23, ptr %arrayidx4, align 4
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, %N
-  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !3
-
-for.cond.cleanup:                                 ; preds = %middle.block, %for.body
-  ret void
-}
-
-; Fixed IV steps resulting from vscale_range with a single element
-
-define void @fixed_iv_step(ptr %arg0, ptr %arg1, i64 %N) #0 {
-; CHECK-LABEL: define void @fixed_iv_step(
-; CHECK-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true)
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]]
-; CHECK-NEXT:    tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP1]])
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
-; CHECK-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4]]
-; CHECK:       for.end.loopexit5:
-; CHECK-NEXT:    br label [[FOR_END:%.*]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
-;
-; LOOP-DEL-LABEL: define void @fixed_iv_step(
-; LOOP-DEL-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
-; LOOP-DEL-NEXT:  entry:
-; LOOP-DEL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
-; LOOP-DEL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; LOOP-DEL-NEXT:    br label [[VECTOR_BODY:%.*]]
-; LOOP-DEL:       vector.body:
-; LOOP-DEL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; LOOP-DEL-NEXT:    [[TMP0:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
-; LOOP-DEL-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true)
-; LOOP-DEL-NEXT:    [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]]
-; LOOP-DEL-NEXT:    tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP1]])
-; LOOP-DEL-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
-; LOOP-DEL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
-; LOOP-DEL-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
-; LOOP-DEL-NEXT:    br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4]]
-; LOOP-DEL:       for.end:
-; LOOP-DEL-NEXT:    ret void
-;
-entry:
-  br label %vector.ph
-
-vector.ph:
-  %n.rnd.up = add nsw i64 %N, 15
-  %n.vec = and i64 %n.rnd.up, -16
-  %broadcast.splatinsert = insertelement <vscale x 2 x ptr> poison, ptr %arg0, i64 0
-  %broadcast.splat = shufflevector <vscale x 2 x ptr> %broadcast.splatinsert, <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-  br label %vector.body
-
-vector.body:
-  %lsr.iv32 = phi i64 [ %lsr.iv.next33, %vector.body ], [ %n.vec, %vector.ph ]
-  %evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ]
-  %41 = sub i64 %N, %evl.based.iv
-  %42 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %41, i32 2, i1 true)
-  %gep = getelementptr ptr, ptr %arg1, i64 %evl.based.iv
-  tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> %broadcast.splat, ptr align 8 %gep, <vscale x 2 x i1> splat (i1 true), i32 %42)
-  %43 = zext i32 %42 to i64
-  %index.evl.next = add i64 %evl.based.iv, %43
-  %lsr.iv.next33 = add i64 %lsr.iv32, -16
-  %44 = icmp eq i64 %lsr.iv.next33, 0
-  br i1 %44, label %for.end.loopexit5, label %vector.body, !llvm.loop !3
-
-for.end.loopexit5:
-  br label %for.end
-
-for.end:
-  ret void
-}
-
-; Fixed IV step and trip count
-define void @fixed_iv_step_tc(ptr %arg0, ptr %arg1) #0 {
-; CHECK-LABEL: define void @fixed_iv_step_tc(
-; CHECK-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 87, [[EVL_BASED_IV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true)
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]]
-; CHECK-NEXT:    tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP1]])
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
-; CHECK-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 87
-; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4]]
-; CHECK:       for.end.loopexit5:
-; CHECK-NEXT:    br label [[FOR_END:%.*]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
-;
-; LOOP-DEL-LABEL: define void @fixed_iv_step_tc(
-; LOOP-DEL-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]]) #[[ATTR1]] {
-; LOOP-DEL-NEXT:  entry:
-; LOOP-DEL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
-; LOOP-DEL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; LOOP-DEL-NEXT:    br label [[VECTOR_BODY:%.*]]
-; LOOP-DEL:       vector.body:
-; LOOP-DEL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; LOOP-DEL-NEXT:    [[TMP0:%.*]] = sub i64 87, [[EVL_BASED_IV]]
-; LOOP-DEL-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true)
-; LOOP-DEL-NEXT:    [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]]
-; LOOP-DEL-NEXT:    tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP1]])
-; LOOP-DEL-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
-; LOOP-DEL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
-; LOOP-DEL-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 87
-; LOOP-DEL-NEXT:    br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4]]
-; LOOP-DEL:       for.end:
-; LOOP-DEL-NEXT:    ret void
-;
-entry:
-  br label %vector.ph
-
-vector.ph:
-  %n.rnd.up = add nsw i64 87, 15
-  %n.vec = and i64 %n.rnd.up, -16
-  %broadcast.splatinsert = insertelement <vscale x 2 x ptr> poison, ptr %arg0, i64 0
-  %broadcast.splat = shufflevector <vscale x 2 x ptr> %broadcast.splatinsert, <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-  br label %vector.body
-
-vector.body:
-  %lsr.iv32 = phi i64 [ %lsr.iv.next33, %vector.body ], [ %n.vec, %vector.ph ]
-  %evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ]
-  %41 = sub i64 87, %evl.based.iv
-  %42 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %41, i32 2, i1 true)
-  %gep = getelementptr ptr, ptr %arg1, i64 %evl.based.iv
-  tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> %broadcast.splat, ptr align 8 %gep, <vscale x 2 x i1> splat (i1 true), i32 %42)
-  %43 = zext i32 %42 to i64
-  %index.evl.next = add i64 %evl.based.iv, %43
-  %lsr.iv.next33 = add i64 %lsr.iv32, -16
-  %44 = icmp eq i64 %lsr.iv.next33, 0
-  br i1 %44, label %for.end.loopexit5, label %vector.body, !llvm.loop !3
-
-for.end.loopexit5:
-  br label %for.end
-
-for.end:
-  ret void
-}
-
-declare i64 @llvm.vscale.i64()
-
-declare i32 @llvm.experimental.get.vector.length.i64(i64, i32 immarg, i1 immarg)
-
-declare <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr nocapture, <vscale x 4 x i1>, i32)
-
-declare void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32>, ptr nocapture, <vscale x 4 x i1>, i32)
-
-attributes #0 = { vscale_range(8,8) }
-
-!0 = distinct !{!0, !1, !2, !4}
-!1 = !{!"llvm.loop.isvectorized", i32 1}
-!2 = !{!"llvm.loop.unroll.runtime.disable"}
-!3 = distinct !{!3, !2, !1, !4}
-!4 = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"}
-;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[META3]] = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"}
-; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]], [[META3]]}
-;.
-; LOOP-DEL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
-; LOOP-DEL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; LOOP-DEL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; LOOP-DEL: [[META3]] = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"}
-; LOOP-DEL: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]], [[META3]]}
-;.