[llvm] 37e309f - [AArch64][LoopIdiom] Generalize AArch64LoopIdiomTransform into LoopIdiomVectorize (#94081)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 7 14:06:15 PDT 2024
Author: Min-Yih Hsu
Date: 2024-06-07T14:06:11-07:00
New Revision: 37e309f1635404bfca029c3712ee227a892cd4cf
URL: https://github.com/llvm/llvm-project/commit/37e309f1635404bfca029c3712ee227a892cd4cf
DIFF: https://github.com/llvm/llvm-project/commit/37e309f1635404bfca029c3712ee227a892cd4cf.diff
LOG: [AArch64][LoopIdiom] Generalize AArch64LoopIdiomTransform into LoopIdiomVectorize (#94081)
To facilitate sharing LoopIdiomTransform between AArch64 and RISC-V,
this first patch moves AArch64LoopIdiomTransform from lib/Target/AArch64
to lib/Transforms/Vectorize and renames it to LoopIdiomVectorize. The
following patch (#94082) will teach LoopIdiomVectorize how to generate VP
intrinsics (in addition to the current masked vector style) in favor of
RVV.
Added:
llvm/include/llvm/Transforms/Vectorize/LoopIdiomVectorize.h
llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
Modified:
llvm/lib/Passes/PassBuilder.cpp
llvm/lib/Passes/PassRegistry.def
llvm/lib/Target/AArch64/AArch64.h
llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/lib/Target/AArch64/AArch64TargetMachine.h
llvm/lib/Target/AArch64/CMakeLists.txt
llvm/lib/Transforms/Vectorize/CMakeLists.txt
llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll
Removed:
llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.h
llvm/lib/Target/AArch64/AArch64PassRegistry.def
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.h b/llvm/include/llvm/Transforms/Vectorize/LoopIdiomVectorize.h
similarity index 60%
rename from llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.h
rename to llvm/include/llvm/Transforms/Vectorize/LoopIdiomVectorize.h
index cc68425bb68b5..56f44b7dc6b2a 100644
--- a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopIdiomVectorize.h
@@ -1,4 +1,4 @@
-//===- AArch64LoopIdiomTransform.h --------------------------------------===//
+//===----------LoopIdiomVectorize.h -----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,20 +6,16 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64LOOPIDIOMTRANSFORM_H
-#define LLVM_LIB_TARGET_AARCH64_AARCH64LOOPIDIOMTRANSFORM_H
+#ifndef LLVM_LIB_TRANSFORMS_VECTORIZE_LOOPIDIOMVECTORIZE_H
+#define LLVM_LIB_TRANSFORMS_VECTORIZE_LOOPIDIOMVECTORIZE_H
#include "llvm/IR/PassManager.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
-
-struct AArch64LoopIdiomTransformPass
- : PassInfoMixin<AArch64LoopIdiomTransformPass> {
+struct LoopIdiomVectorizePass : PassInfoMixin<LoopIdiomVectorizePass> {
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
};
-
} // namespace llvm
-
-#endif // LLVM_LIB_TARGET_AARCH64_AARCH64LOOPIDIOMTRANSFORM_H
+#endif // LLVM_LIB_TRANSFORMS_VECTORIZE_LOOPIDIOMVECTORIZE_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 2c56b04a1d9c8..19e8a8ab68a73 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -299,6 +299,7 @@
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
#include "llvm/Transforms/Utils/UnifyLoopExits.h"
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
+#include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
#include "llvm/Transforms/Vectorize/VectorCombine.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index dad97146a9f60..60c517790bcab 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -622,6 +622,7 @@ LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass())
LOOP_PASS("loop-bound-split", LoopBoundSplitPass())
LOOP_PASS("loop-deletion", LoopDeletionPass())
LOOP_PASS("loop-idiom", LoopIdiomRecognizePass())
+LOOP_PASS("loop-idiom-vectorize", LoopIdiomVectorizePass())
LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
LOOP_PASS("loop-predication", LoopPredicationPass())
LOOP_PASS("loop-reduce", LoopStrengthReducePass())
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 0f0a22ec82936..6f2aeb83a451a 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -90,7 +90,6 @@ void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
void initializeAArch64ExpandPseudoPass(PassRegistry &);
void initializeAArch64GlobalsTaggingPass(PassRegistry &);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
-void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
void initializeAArch64MIPeepholeOptPass(PassRegistry &);
void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &);
diff --git a/llvm/lib/Target/AArch64/AArch64PassRegistry.def b/llvm/lib/Target/AArch64/AArch64PassRegistry.def
deleted file mode 100644
index ca944579f93a9..0000000000000
--- a/llvm/lib/Target/AArch64/AArch64PassRegistry.def
+++ /dev/null
@@ -1,20 +0,0 @@
-//===- AArch64PassRegistry.def - Registry of AArch64 passes -----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is used as the registry of passes that are part of the
-// AArch64 backend.
-//
-//===----------------------------------------------------------------------===//
-
-// NOTE: NO INCLUDE GUARD DESIRED!
-
-#ifndef LOOP_PASS
-#define LOOP_PASS(NAME, CREATE_PASS)
-#endif
-LOOP_PASS("aarch64-lit", AArch64LoopIdiomTransformPass())
-#undef LOOP_PASS
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 30f0ceaf674c6..7de9071476e7f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -11,7 +11,6 @@
#include "AArch64TargetMachine.h"
#include "AArch64.h"
-#include "AArch64LoopIdiomTransform.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64MachineScheduler.h"
#include "AArch64MacroFusion.h"
@@ -52,6 +51,7 @@
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/CFGuard.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h"
#include <memory>
#include <optional>
#include <string>
@@ -234,7 +234,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64DeadRegisterDefinitionsPass(*PR);
initializeAArch64ExpandPseudoPass(*PR);
initializeAArch64LoadStoreOptPass(*PR);
- initializeAArch64LoopIdiomTransformLegacyPassPass(*PR);
initializeAArch64MIPeepholeOptPass(*PR);
initializeAArch64SIMDInstrOptPass(*PR);
initializeAArch64O0PreLegalizerCombinerPass(*PR);
@@ -553,12 +552,9 @@ class AArch64PassConfig : public TargetPassConfig {
void AArch64TargetMachine::registerPassBuilderCallbacks(
PassBuilder &PB, bool PopulateClassToPassNames) {
-#define GET_PASS_REGISTRY "AArch64PassRegistry.def"
-#include "llvm/Passes/TargetPassRegistry.inc"
-
PB.registerLateLoopOptimizationsEPCallback(
[=](LoopPassManager &LPM, OptimizationLevel Level) {
- LPM.addPass(AArch64LoopIdiomTransformPass());
+ LPM.addPass(LoopIdiomVectorizePass());
});
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 8fb68b06f1378..e396d9204716a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -14,7 +14,6 @@
#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETMACHINE_H
#include "AArch64InstrInfo.h"
-#include "AArch64LoopIdiomTransform.h"
#include "AArch64Subtarget.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 8e76f6c9279e7..639bc0707dff2 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -65,7 +65,6 @@ add_llvm_target(AArch64CodeGen
AArch64ISelLowering.cpp
AArch64InstrInfo.cpp
AArch64LoadStoreOptimizer.cpp
- AArch64LoopIdiomTransform.cpp
AArch64LowerHomogeneousPrologEpilog.cpp
AArch64MachineFunctionInfo.cpp
AArch64MachineScheduler.cpp
@@ -112,6 +111,7 @@ add_llvm_target(AArch64CodeGen
Target
TargetParser
TransformUtils
+ Vectorize
ADD_TO_COMPONENT
AArch64
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 9674094024b9e..4caec07c5ac43 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -1,5 +1,6 @@
add_llvm_component_library(LLVMVectorize
LoadStoreVectorizer.cpp
+ LoopIdiomVectorize.cpp
LoopVectorizationLegality.cpp
LoopVectorize.cpp
SLPVectorizer.cpp
diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
similarity index 73%
rename from llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
rename to llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
index 8ae3f014d45e0..38095b1433ebe 100644
--- a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
@@ -1,4 +1,4 @@
-//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition -------------===//
+//===-------- LoopIdiomVectorize.cpp - Loop idiom vectorization -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -26,6 +26,10 @@
//
//===----------------------------------------------------------------------===//
//
+// NOTE: This Pass matches a really specific loop pattern because it's only
+// supposed to be a temporary solution until our LoopVectorizer is powerful
+// enought to vectorize it automatically.
+//
// TODO List:
//
// * Add support for the inverse case where we scan for a matching element.
@@ -35,7 +39,7 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -44,37 +48,30 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "aarch64-loop-idiom-transform"
-
-static cl::opt<bool>
- DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
- cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
-
-static cl::opt<bool> DisableByteCmp(
- "disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
- cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
- "not convert byte-compare loop(s)."));
+#define DEBUG_TYPE "loop-idiom-vectorize"
-static cl::opt<bool> VerifyLoops(
- "aarch64-lit-verify", cl::Hidden, cl::init(false),
- cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+static cl::opt<bool> DisableAll("disable-loop-idiom-vectorize-all", cl::Hidden,
+ cl::init(false),
+ cl::desc("Disable Loop Idiom Vectorize Pass."));
-namespace llvm {
-
-void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
-Pass *createAArch64LoopIdiomTransformPass();
+static cl::opt<bool>
+ DisableByteCmp("disable-loop-idiom-vectorize-bytecmp", cl::Hidden,
+ cl::init(false),
+ cl::desc("Proceed with Loop Idiom Vectorize Pass, but do "
+ "not convert byte-compare loop(s)."));
-} // end namespace llvm
+static cl::opt<bool>
+ VerifyLoops("loop-idiom-vectorize-verify", cl::Hidden, cl::init(false),
+ cl::desc("Verify loops generated Loop Idiom Vectorize Pass."));
namespace {
-class AArch64LoopIdiomTransform {
+class LoopIdiomVectorize {
Loop *CurLoop = nullptr;
DominatorTree *DT;
LoopInfo *LI;
@@ -82,9 +79,9 @@ class AArch64LoopIdiomTransform {
const DataLayout *DL;
public:
- explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
- const TargetTransformInfo *TTI,
- const DataLayout *DL)
+ explicit LoopIdiomVectorize(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
: DT(DT), LI(LI), TTI(TTI), DL(DL) {}
bool run(Loop *L);
@@ -107,74 +104,17 @@ class AArch64LoopIdiomTransform {
BasicBlock *EndBB);
/// @}
};
+} // anonymous namespace
-class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
-public:
- static char ID;
-
- explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
- initializeAArch64LoopIdiomTransformLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- StringRef getPassName() const override {
- return "Transform AArch64-specific loop idioms";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-};
-
-bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
- LPPassManager &LPM) {
-
- if (skipLoop(L))
- return false;
-
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *L->getHeader()->getParent());
- return AArch64LoopIdiomTransform(
- DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout())
- .run(L);
-}
-
-} // end anonymous namespace
-
-char AArch64LoopIdiomTransformLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(
- AArch64LoopIdiomTransformLegacyPass, "aarch64-lit",
- "Transform specific loop idioms into optimized vector forms", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(
- AArch64LoopIdiomTransformLegacyPass, "aarch64-lit",
- "Transform specific loop idioms into optimized vector forms", false, false)
-
-Pass *llvm::createAArch64LoopIdiomTransformPass() {
- return new AArch64LoopIdiomTransformLegacyPass();
-}
-
-PreservedAnalyses
-AArch64LoopIdiomTransformPass::run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR,
- LPMUpdater &) {
+PreservedAnalyses LoopIdiomVectorizePass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &) {
if (DisableAll)
return PreservedAnalyses::all();
const auto *DL = &L.getHeader()->getModule()->getDataLayout();
- AArch64LoopIdiomTransform LIT(&AR.DT, &AR.LI, &AR.TTI, DL);
+ LoopIdiomVectorize LIT(&AR.DT, &AR.LI, &AR.TTI, DL);
if (!LIT.run(&L))
return PreservedAnalyses::all();
@@ -183,11 +123,11 @@ AArch64LoopIdiomTransformPass::run(Loop &L, LoopAnalysisManager &AM,
//===----------------------------------------------------------------------===//
//
-// Implementation of AArch64LoopIdiomTransform
+// Implementation of LoopIdiomVectorize
//
//===----------------------------------------------------------------------===//
-bool AArch64LoopIdiomTransform::run(Loop *L) {
+bool LoopIdiomVectorize::run(Loop *L) {
CurLoop = L;
Function &F = *L->getHeader()->getParent();
@@ -211,7 +151,7 @@ bool AArch64LoopIdiomTransform::run(Loop *L) {
return recognizeByteCompare();
}
-bool AArch64LoopIdiomTransform::recognizeByteCompare() {
+bool LoopIdiomVectorize::recognizeByteCompare() {
// Currently the transformation only works on scalable vector types, although
// there is no fundamental reason why it cannot be made to work for fixed
// width too.
@@ -224,7 +164,7 @@ bool AArch64LoopIdiomTransform::recognizeByteCompare() {
BasicBlock *Header = CurLoop->getHeader();
- // In AArch64LoopIdiomTransform::run we have already checked that the loop
+ // In LoopIdiomVectorize::run we have already checked that the loop
// has a preheader so we can assume it's in a canonical form.
if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 2)
return false;
@@ -242,8 +182,7 @@ bool AArch64LoopIdiomTransform::recognizeByteCompare() {
// %cmp.not = icmp eq i32 %inc, %n
// br i1 %cmp.not, label %while.end, label %while.body
//
- auto CondBBInsts = LoopBlocks[0]->instructionsWithoutDebug();
- if (std::distance(CondBBInsts.begin(), CondBBInsts.end()) > 4)
+ if (LoopBlocks[0]->sizeWithoutDebug() > 4)
return false;
// The second block should contain 7 instructions, e.g.
@@ -257,8 +196,7 @@ bool AArch64LoopIdiomTransform::recognizeByteCompare() {
// %cmp.not.ld = icmp eq i8 %load.a, %load.b
// br i1 %cmp.not.ld, label %while.cond, label %while.end
//
- auto LoopBBInsts = LoopBlocks[1]->instructionsWithoutDebug();
- if (std::distance(LoopBBInsts.begin(), LoopBBInsts.end()) > 7)
+ if (LoopBlocks[1]->sizeWithoutDebug() > 7)
return false;
// The incoming value to the PHI node from the loop should be an add of 1.
@@ -393,7 +331,7 @@ bool AArch64LoopIdiomTransform::recognizeByteCompare() {
return true;
}
-Value *AArch64LoopIdiomTransform::expandFindMismatch(
+Value *LoopIdiomVectorize::expandFindMismatch(
IRBuilder<> &Builder, DomTreeUpdater &DTU, GetElementPtrInst *GEPA,
GetElementPtrInst *GEPB, Instruction *Index, Value *Start, Value *MaxLen) {
Value *PtrA = GEPA->getPointerOperand();
@@ -414,10 +352,10 @@ Value *AArch64LoopIdiomTransform::expandFindMismatch(
// 1. A block for checking the zero-extended length exceeds 0
// 2. A block to check that the start and end addresses of a given array
// lie on the same page.
- // 3. The SVE loop preheader.
- // 4. The first SVE loop block.
- // 5. The SVE loop increment block.
- // 6. A block we can jump to from the SVE loop when a mismatch is found.
+ // 3. The vector loop preheader.
+ // 4. The first vector loop block.
+ // 5. The vector loop increment block.
+ // 6. A block we can jump to from the vector loop when a mismatch is found.
// 7. The first block of the scalar loop itself, containing PHIs , loads
// and cmp.
// 8. A scalar loop increment block to increment the PHIs and go back
@@ -432,17 +370,17 @@ Value *AArch64LoopIdiomTransform::expandFindMismatch(
BasicBlock *MemCheckBlock = BasicBlock::Create(
Ctx, "mismatch_mem_check", EndBlock->getParent(), EndBlock);
- BasicBlock *SVELoopPreheaderBlock = BasicBlock::Create(
- Ctx, "mismatch_sve_loop_preheader", EndBlock->getParent(), EndBlock);
+ BasicBlock *VectorLoopPreheaderBlock = BasicBlock::Create(
+ Ctx, "mismatch_vec_loop_preheader", EndBlock->getParent(), EndBlock);
- BasicBlock *SVELoopStartBlock = BasicBlock::Create(
- Ctx, "mismatch_sve_loop", EndBlock->getParent(), EndBlock);
+ BasicBlock *VectorLoopStartBlock = BasicBlock::Create(
+ Ctx, "mismatch_vec_loop", EndBlock->getParent(), EndBlock);
- BasicBlock *SVELoopIncBlock = BasicBlock::Create(
- Ctx, "mismatch_sve_loop_inc", EndBlock->getParent(), EndBlock);
+ BasicBlock *VectorLoopIncBlock = BasicBlock::Create(
+ Ctx, "mismatch_vec_loop_inc", EndBlock->getParent(), EndBlock);
- BasicBlock *SVELoopMismatchBlock = BasicBlock::Create(
- Ctx, "mismatch_sve_loop_found", EndBlock->getParent(), EndBlock);
+ BasicBlock *VectorLoopMismatchBlock = BasicBlock::Create(
+ Ctx, "mismatch_vec_loop_found", EndBlock->getParent(), EndBlock);
BasicBlock *LoopPreHeaderBlock = BasicBlock::Create(
Ctx, "mismatch_loop_pre", EndBlock->getParent(), EndBlock);
@@ -456,26 +394,27 @@ Value *AArch64LoopIdiomTransform::expandFindMismatch(
DTU.applyUpdates({{DominatorTree::Insert, Preheader, MinItCheckBlock},
{DominatorTree::Delete, Preheader, EndBlock}});
- // Update LoopInfo with the new SVE & scalar loops.
- auto SVELoop = LI->AllocateLoop();
+ // Update LoopInfo with the new vector & scalar loops.
+ auto VectorLoop = LI->AllocateLoop();
auto ScalarLoop = LI->AllocateLoop();
if (CurLoop->getParentLoop()) {
CurLoop->getParentLoop()->addBasicBlockToLoop(MinItCheckBlock, *LI);
CurLoop->getParentLoop()->addBasicBlockToLoop(MemCheckBlock, *LI);
- CurLoop->getParentLoop()->addBasicBlockToLoop(SVELoopPreheaderBlock, *LI);
- CurLoop->getParentLoop()->addChildLoop(SVELoop);
- CurLoop->getParentLoop()->addBasicBlockToLoop(SVELoopMismatchBlock, *LI);
+ CurLoop->getParentLoop()->addBasicBlockToLoop(VectorLoopPreheaderBlock,
+ *LI);
+ CurLoop->getParentLoop()->addChildLoop(VectorLoop);
+ CurLoop->getParentLoop()->addBasicBlockToLoop(VectorLoopMismatchBlock, *LI);
CurLoop->getParentLoop()->addBasicBlockToLoop(LoopPreHeaderBlock, *LI);
CurLoop->getParentLoop()->addChildLoop(ScalarLoop);
} else {
- LI->addTopLevelLoop(SVELoop);
+ LI->addTopLevelLoop(VectorLoop);
LI->addTopLevelLoop(ScalarLoop);
}
// Add the new basic blocks to their associated loops.
- SVELoop->addBasicBlockToLoop(SVELoopStartBlock, *LI);
- SVELoop->addBasicBlockToLoop(SVELoopIncBlock, *LI);
+ VectorLoop->addBasicBlockToLoop(VectorLoopStartBlock, *LI);
+ VectorLoop->addBasicBlockToLoop(VectorLoopIncBlock, *LI);
ScalarLoop->addBasicBlockToLoop(LoopStartBlock, *LI);
ScalarLoop->addBasicBlockToLoop(LoopIncBlock, *LI);
@@ -537,7 +476,7 @@ Value *AArch64LoopIdiomTransform::expandFindMismatch(
Value *CombinedPageCmp = Builder.CreateOr(LhsPageCmp, RhsPageCmp);
BranchInst *CombinedPageCmpCmpBr = BranchInst::Create(
- LoopPreHeaderBlock, SVELoopPreheaderBlock, CombinedPageCmp);
+ LoopPreHeaderBlock, VectorLoopPreheaderBlock, CombinedPageCmp);
CombinedPageCmpCmpBr->setMetadata(
LLVMContext::MD_prof, MDBuilder(CombinedPageCmpCmpBr->getContext())
.createBranchWeights(10, 90));
@@ -545,12 +484,12 @@ Value *AArch64LoopIdiomTransform::expandFindMismatch(
DTU.applyUpdates(
{{DominatorTree::Insert, MemCheckBlock, LoopPreHeaderBlock},
- {DominatorTree::Insert, MemCheckBlock, SVELoopPreheaderBlock}});
+ {DominatorTree::Insert, MemCheckBlock, VectorLoopPreheaderBlock}});
- // Set up the SVE loop preheader, i.e. calculate initial loop predicate,
+ // Set up the vector loop preheader, i.e. calculate initial loop predicate,
// zero-extend MaxLen to 64-bits, determine the number of vector elements
// processed in each iteration, etc.
- Builder.SetInsertPoint(SVELoopPreheaderBlock);
+ Builder.SetInsertPoint(VectorLoopPreheaderBlock);
// At this point we know two things must be true:
// 1. Start <= End
@@ -570,88 +509,91 @@ Value *AArch64LoopIdiomTransform::expandFindMismatch(
Value *PFalse = Builder.CreateVectorSplat(PredVTy->getElementCount(),
Builder.getInt1(false));
- BranchInst *JumpToSVELoop = BranchInst::Create(SVELoopStartBlock);
- Builder.Insert(JumpToSVELoop);
+ BranchInst *JumpToVectorLoop = BranchInst::Create(VectorLoopStartBlock);
+ Builder.Insert(JumpToVectorLoop);
- DTU.applyUpdates(
- {{DominatorTree::Insert, SVELoopPreheaderBlock, SVELoopStartBlock}});
+ DTU.applyUpdates({{DominatorTree::Insert, VectorLoopPreheaderBlock,
+ VectorLoopStartBlock}});
- // Set up the first SVE loop block by creating the PHIs, doing the vector
+ // Set up the first vector loop block by creating the PHIs, doing the vector
// loads and comparing the vectors.
- Builder.SetInsertPoint(SVELoopStartBlock);
- PHINode *LoopPred = Builder.CreatePHI(PredVTy, 2, "mismatch_sve_loop_pred");
- LoopPred->addIncoming(InitialPred, SVELoopPreheaderBlock);
- PHINode *SVEIndexPhi = Builder.CreatePHI(I64Type, 2, "mismatch_sve_index");
- SVEIndexPhi->addIncoming(ExtStart, SVELoopPreheaderBlock);
- Type *SVELoadType = ScalableVectorType::get(Builder.getInt8Ty(), 16);
- Value *Passthru = ConstantInt::getNullValue(SVELoadType);
-
- Value *SVELhsGep =
- Builder.CreateGEP(LoadType, PtrA, SVEIndexPhi, "", GEPA->isInBounds());
- Value *SVELhsLoad = Builder.CreateMaskedLoad(SVELoadType, SVELhsGep, Align(1),
- LoopPred, Passthru);
-
- Value *SVERhsGep =
- Builder.CreateGEP(LoadType, PtrB, SVEIndexPhi, "", GEPB->isInBounds());
- Value *SVERhsLoad = Builder.CreateMaskedLoad(SVELoadType, SVERhsGep, Align(1),
- LoopPred, Passthru);
-
- Value *SVEMatchCmp = Builder.CreateICmpNE(SVELhsLoad, SVERhsLoad);
- SVEMatchCmp = Builder.CreateSelect(LoopPred, SVEMatchCmp, PFalse);
- Value *SVEMatchHasActiveLanes = Builder.CreateOrReduce(SVEMatchCmp);
- BranchInst *SVEEarlyExit = BranchInst::Create(
- SVELoopMismatchBlock, SVELoopIncBlock, SVEMatchHasActiveLanes);
- Builder.Insert(SVEEarlyExit);
+ Builder.SetInsertPoint(VectorLoopStartBlock);
+ PHINode *LoopPred = Builder.CreatePHI(PredVTy, 2, "mismatch_vec_loop_pred");
+ LoopPred->addIncoming(InitialPred, VectorLoopPreheaderBlock);
+ PHINode *VectorIndexPhi = Builder.CreatePHI(I64Type, 2, "mismatch_vec_index");
+ VectorIndexPhi->addIncoming(ExtStart, VectorLoopPreheaderBlock);
+ Type *VectorLoadType = ScalableVectorType::get(Builder.getInt8Ty(), 16);
+ Value *Passthru = ConstantInt::getNullValue(VectorLoadType);
+
+ Value *VectorLhsGep =
+ Builder.CreateGEP(LoadType, PtrA, VectorIndexPhi, "", GEPA->isInBounds());
+ Value *VectorLhsLoad = Builder.CreateMaskedLoad(VectorLoadType, VectorLhsGep,
+ Align(1), LoopPred, Passthru);
+
+ Value *VectorRhsGep =
+ Builder.CreateGEP(LoadType, PtrB, VectorIndexPhi, "", GEPB->isInBounds());
+ Value *VectorRhsLoad = Builder.CreateMaskedLoad(VectorLoadType, VectorRhsGep,
+ Align(1), LoopPred, Passthru);
+
+ Value *VectorMatchCmp = Builder.CreateICmpNE(VectorLhsLoad, VectorRhsLoad);
+ VectorMatchCmp = Builder.CreateSelect(LoopPred, VectorMatchCmp, PFalse);
+ Value *VectorMatchHasActiveLanes = Builder.CreateOrReduce(VectorMatchCmp);
+ BranchInst *VectorEarlyExit = BranchInst::Create(
+ VectorLoopMismatchBlock, VectorLoopIncBlock, VectorMatchHasActiveLanes);
+ Builder.Insert(VectorEarlyExit);
DTU.applyUpdates(
- {{DominatorTree::Insert, SVELoopStartBlock, SVELoopMismatchBlock},
- {DominatorTree::Insert, SVELoopStartBlock, SVELoopIncBlock}});
+ {{DominatorTree::Insert, VectorLoopStartBlock, VectorLoopMismatchBlock},
+ {DominatorTree::Insert, VectorLoopStartBlock, VectorLoopIncBlock}});
// Increment the index counter and calculate the predicate for the next
// iteration of the loop. We branch back to the start of the loop if there
// is at least one active lane.
- Builder.SetInsertPoint(SVELoopIncBlock);
- Value *NewSVEIndexPhi = Builder.CreateAdd(SVEIndexPhi, VecLen, "",
- /*HasNUW=*/true, /*HasNSW=*/true);
- SVEIndexPhi->addIncoming(NewSVEIndexPhi, SVELoopIncBlock);
+ Builder.SetInsertPoint(VectorLoopIncBlock);
+ Value *NewVectorIndexPhi =
+ Builder.CreateAdd(VectorIndexPhi, VecLen, "",
+ /*HasNUW=*/true, /*HasNSW=*/true);
+ VectorIndexPhi->addIncoming(NewVectorIndexPhi, VectorLoopIncBlock);
Value *NewPred =
Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
- {PredVTy, I64Type}, {NewSVEIndexPhi, ExtEnd});
- LoopPred->addIncoming(NewPred, SVELoopIncBlock);
+ {PredVTy, I64Type}, {NewVectorIndexPhi, ExtEnd});
+ LoopPred->addIncoming(NewPred, VectorLoopIncBlock);
Value *PredHasActiveLanes =
Builder.CreateExtractElement(NewPred, uint64_t(0));
- BranchInst *SVELoopBranchBack =
- BranchInst::Create(SVELoopStartBlock, EndBlock, PredHasActiveLanes);
- Builder.Insert(SVELoopBranchBack);
+ BranchInst *VectorLoopBranchBack =
+ BranchInst::Create(VectorLoopStartBlock, EndBlock, PredHasActiveLanes);
+ Builder.Insert(VectorLoopBranchBack);
- DTU.applyUpdates({{DominatorTree::Insert, SVELoopIncBlock, SVELoopStartBlock},
- {DominatorTree::Insert, SVELoopIncBlock, EndBlock}});
+ DTU.applyUpdates(
+ {{DominatorTree::Insert, VectorLoopIncBlock, VectorLoopStartBlock},
+ {DominatorTree::Insert, VectorLoopIncBlock, EndBlock}});
// If we found a mismatch then we need to calculate which lane in the vector
// had a mismatch and add that on to the current loop index.
- Builder.SetInsertPoint(SVELoopMismatchBlock);
- PHINode *FoundPred = Builder.CreatePHI(PredVTy, 1, "mismatch_sve_found_pred");
- FoundPred->addIncoming(SVEMatchCmp, SVELoopStartBlock);
+ Builder.SetInsertPoint(VectorLoopMismatchBlock);
+ PHINode *FoundPred = Builder.CreatePHI(PredVTy, 1, "mismatch_vec_found_pred");
+ FoundPred->addIncoming(VectorMatchCmp, VectorLoopStartBlock);
PHINode *LastLoopPred =
- Builder.CreatePHI(PredVTy, 1, "mismatch_sve_last_loop_pred");
- LastLoopPred->addIncoming(LoopPred, SVELoopStartBlock);
- PHINode *SVEFoundIndex =
- Builder.CreatePHI(I64Type, 1, "mismatch_sve_found_index");
- SVEFoundIndex->addIncoming(SVEIndexPhi, SVELoopStartBlock);
+ Builder.CreatePHI(PredVTy, 1, "mismatch_vec_last_loop_pred");
+ LastLoopPred->addIncoming(LoopPred, VectorLoopStartBlock);
+ PHINode *VectorFoundIndex =
+ Builder.CreatePHI(I64Type, 1, "mismatch_vec_found_index");
+ VectorFoundIndex->addIncoming(VectorIndexPhi, VectorLoopStartBlock);
Value *PredMatchCmp = Builder.CreateAnd(LastLoopPred, FoundPred);
Value *Ctz = Builder.CreateIntrinsic(
Intrinsic::experimental_cttz_elts, {ResType, PredMatchCmp->getType()},
{PredMatchCmp, /*ZeroIsPoison=*/Builder.getInt1(true)});
Ctz = Builder.CreateZExt(Ctz, I64Type);
- Value *SVELoopRes64 = Builder.CreateAdd(SVEFoundIndex, Ctz, "",
- /*HasNUW=*/true, /*HasNSW=*/true);
- Value *SVELoopRes = Builder.CreateTrunc(SVELoopRes64, ResType);
+ Value *VectorLoopRes64 = Builder.CreateAdd(VectorFoundIndex, Ctz, "",
+ /*HasNUW=*/true, /*HasNSW=*/true);
+ Value *VectorLoopRes = Builder.CreateTrunc(VectorLoopRes64, ResType);
Builder.Insert(BranchInst::Create(EndBlock));
- DTU.applyUpdates({{DominatorTree::Insert, SVELoopMismatchBlock, EndBlock}});
+ DTU.applyUpdates(
+ {{DominatorTree::Insert, VectorLoopMismatchBlock, EndBlock}});
// Generate code for scalar loop.
Builder.SetInsertPoint(LoopPreHeaderBlock);
@@ -701,22 +643,22 @@ Value *AArch64LoopIdiomTransform::expandFindMismatch(
// 1. We didn't find a mismatch in the scalar loop, so we return MaxLen.
// 2. We exitted the scalar loop early due to a mismatch and need to return
// the index that we found.
- // 3. We didn't find a mismatch in the SVE loop, so we return MaxLen.
- // 4. We exitted the SVE loop early due to a mismatch and need to return
+ // 3. We didn't find a mismatch in the vector loop, so we return MaxLen.
+ // 4. We exitted the vector loop early due to a mismatch and need to return
// the index that we found.
Builder.SetInsertPoint(EndBlock, EndBlock->getFirstInsertionPt());
PHINode *ResPhi = Builder.CreatePHI(ResType, 4, "mismatch_result");
ResPhi->addIncoming(MaxLen, LoopIncBlock);
ResPhi->addIncoming(IndexPhi, LoopStartBlock);
- ResPhi->addIncoming(MaxLen, SVELoopIncBlock);
- ResPhi->addIncoming(SVELoopRes, SVELoopMismatchBlock);
+ ResPhi->addIncoming(MaxLen, VectorLoopIncBlock);
+ ResPhi->addIncoming(VectorLoopRes, VectorLoopMismatchBlock);
Value *FinalRes = Builder.CreateTrunc(ResPhi, ResType);
if (VerifyLoops) {
ScalarLoop->verifyLoop();
- SVELoop->verifyLoop();
- if (!SVELoop->isRecursivelyLCSSAForm(*DT, *LI))
+ VectorLoop->verifyLoop();
+ if (!VectorLoop->isRecursivelyLCSSAForm(*DT, *LI))
report_fatal_error("Loops must remain in LCSSA form!");
if (!ScalarLoop->isRecursivelyLCSSAForm(*DT, *LI))
report_fatal_error("Loops must remain in LCSSA form!");
@@ -725,10 +667,12 @@ Value *AArch64LoopIdiomTransform::expandFindMismatch(
return FinalRes;
}
-void AArch64LoopIdiomTransform::transformByteCompare(
- GetElementPtrInst *GEPA, GetElementPtrInst *GEPB, PHINode *IndPhi,
- Value *MaxLen, Instruction *Index, Value *Start, bool IncIdx,
- BasicBlock *FoundBB, BasicBlock *EndBB) {
+void LoopIdiomVectorize::transformByteCompare(GetElementPtrInst *GEPA,
+ GetElementPtrInst *GEPB,
+ PHINode *IndPhi, Value *MaxLen,
+ Instruction *Index, Value *Start,
+ bool IncIdx, BasicBlock *FoundBB,
+ BasicBlock *EndBB) {
// Insert the byte compare code at the end of the preheader block
BasicBlock *Preheader = CurLoop->getLoopPreheader();
diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll b/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll
index 27ab11446b571..39037761c81bb 100644
--- a/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll
+++ b/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll
@@ -1,10 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
-; RUN: opt -aarch64-lit -aarch64-lit-verify -verify-dom-info -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s
-; RUN: opt -aarch64-lit -simplifycfg -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=LOOP-DEL
-; RUN: opt -aarch64-lit -mtriple aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=NO-TRANSFORM
-; RUN: opt -p aarch64-lit -aarch64-lit-verify -verify-dom-info -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s
-; RUN: opt -passes='function(loop(aarch64-lit)),simplifycfg' -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=LOOP-DEL
-; RUN: opt -p aarch64-lit -mtriple aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=NO-TRANSFORM
+; RUN: opt -p loop-idiom-vectorize -loop-idiom-vectorize-verify -verify-dom-info -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s
+; RUN: opt -passes='function(loop(loop-idiom-vectorize)),simplifycfg' -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=LOOP-DEL
+; RUN: opt -p loop-idiom-vectorize -mtriple aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=NO-TRANSFORM
define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) {
; CHECK-LABEL: define i32 @compare_bytes_simple(
@@ -33,36 +30,36 @@ define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) {
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: mismatch_sve_loop_preheader:
+; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK: mismatch_vec_loop_preheader:
; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
-; CHECK-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
-; CHECK: mismatch_sve_loop:
-; CHECK-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
-; CHECK-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
-; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]]
+; CHECK: mismatch_vec_loop:
+; CHECK-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]]
+; CHECK-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]]
+; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
; CHECK-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
-; CHECK-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
; CHECK-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
-; CHECK-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
-; CHECK: mismatch_sve_loop_inc:
-; CHECK-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
+; CHECK: mismatch_vec_loop_inc:
+; CHECK-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]]
; CHECK-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
-; CHECK-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
-; CHECK: mismatch_sve_loop_found:
-; CHECK-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; CHECK-NEXT: br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]]
+; CHECK: mismatch_vec_loop_found:
+; CHECK-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
; CHECK-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
-; CHECK-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; CHECK-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
; CHECK-NEXT: br label [[MISMATCH_END]]
; CHECK: mismatch_loop_pre:
@@ -81,7 +78,7 @@ define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) {
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
; CHECK-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
; CHECK: mismatch_end:
-; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ]
; CHECK-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
@@ -128,36 +125,36 @@ define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) {
; LOOP-DEL-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
; LOOP-DEL-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
; LOOP-DEL-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
-; LOOP-DEL-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1:![0-9]+]]
-; LOOP-DEL: mismatch_sve_loop_preheader:
+; LOOP-DEL-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1:![0-9]+]]
+; LOOP-DEL: mismatch_vec_loop_preheader:
; LOOP-DEL-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
; LOOP-DEL-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; LOOP-DEL-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
-; LOOP-DEL-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
-; LOOP-DEL: mismatch_sve_loop:
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
-; LOOP-DEL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
-; LOOP-DEL-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
-; LOOP-DEL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
-; LOOP-DEL-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]]
+; LOOP-DEL: mismatch_vec_loop:
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
+; LOOP-DEL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]]
+; LOOP-DEL-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]]
+; LOOP-DEL-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
; LOOP-DEL-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
-; LOOP-DEL-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; LOOP-DEL-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
; LOOP-DEL-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
-; LOOP-DEL-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
-; LOOP-DEL: mismatch_sve_loop_inc:
-; LOOP-DEL-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; LOOP-DEL-NEXT: br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
+; LOOP-DEL: mismatch_vec_loop_inc:
+; LOOP-DEL-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]]
; LOOP-DEL-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
; LOOP-DEL-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
-; LOOP-DEL-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[WHILE_END:%.*]]
-; LOOP-DEL: mismatch_sve_loop_found:
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
-; LOOP-DEL-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; LOOP-DEL-NEXT: br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[WHILE_END:%.*]]
+; LOOP-DEL: mismatch_vec_loop_found:
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
+; LOOP-DEL-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
; LOOP-DEL-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
; LOOP-DEL-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
-; LOOP-DEL-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; LOOP-DEL-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]]
; LOOP-DEL-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
; LOOP-DEL-NEXT: br label [[WHILE_END]]
; LOOP-DEL: mismatch_loop_pre:
@@ -176,7 +173,7 @@ define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) {
; LOOP-DEL-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
; LOOP-DEL-NEXT: br i1 [[TMP44]], label [[WHILE_END]], label [[MISMATCH_LOOP]]
; LOOP-DEL: while.end:
-; LOOP-DEL-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ]
; LOOP-DEL-NEXT: [[RES:%.*]] = add i32 [[MISMATCH_RESULT]], [[EXTRA]]
; LOOP-DEL-NEXT: ret i32 [[RES]]
;
@@ -256,36 +253,36 @@ define i32 @compare_bytes_signed_wrap(ptr %a, ptr %b, i32 %len, i32 %n) {
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
-; CHECK: mismatch_sve_loop_preheader:
+; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; CHECK: mismatch_vec_loop_preheader:
; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
-; CHECK-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
-; CHECK: mismatch_sve_loop:
-; CHECK-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
-; CHECK-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
-; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]]
+; CHECK: mismatch_vec_loop:
+; CHECK-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]]
+; CHECK-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]]
+; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
; CHECK-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
-; CHECK-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
; CHECK-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
-; CHECK-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
-; CHECK: mismatch_sve_loop_inc:
-; CHECK-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
+; CHECK: mismatch_vec_loop_inc:
+; CHECK-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]]
; CHECK-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
-; CHECK-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
-; CHECK: mismatch_sve_loop_found:
-; CHECK-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; CHECK-NEXT: br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]]
+; CHECK: mismatch_vec_loop_found:
+; CHECK-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
; CHECK-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
-; CHECK-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; CHECK-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
; CHECK-NEXT: br label [[MISMATCH_END]]
; CHECK: mismatch_loop_pre:
@@ -304,7 +301,7 @@ define i32 @compare_bytes_signed_wrap(ptr %a, ptr %b, i32 %len, i32 %n) {
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
; CHECK-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
; CHECK: mismatch_end:
-; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ]
; CHECK-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
@@ -349,36 +346,36 @@ define i32 @compare_bytes_signed_wrap(ptr %a, ptr %b, i32 %len, i32 %n) {
; LOOP-DEL-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
; LOOP-DEL-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
; LOOP-DEL-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
-; LOOP-DEL-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
-; LOOP-DEL: mismatch_sve_loop_preheader:
+; LOOP-DEL-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; LOOP-DEL: mismatch_vec_loop_preheader:
; LOOP-DEL-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
; LOOP-DEL-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; LOOP-DEL-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
-; LOOP-DEL-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
-; LOOP-DEL: mismatch_sve_loop:
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
-; LOOP-DEL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
-; LOOP-DEL-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
-; LOOP-DEL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
-; LOOP-DEL-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]]
+; LOOP-DEL: mismatch_vec_loop:
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
+; LOOP-DEL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]]
+; LOOP-DEL-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]]
+; LOOP-DEL-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
; LOOP-DEL-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
-; LOOP-DEL-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; LOOP-DEL-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
; LOOP-DEL-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
-; LOOP-DEL-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
-; LOOP-DEL: mismatch_sve_loop_inc:
-; LOOP-DEL-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; LOOP-DEL-NEXT: br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
+; LOOP-DEL: mismatch_vec_loop_inc:
+; LOOP-DEL-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]]
; LOOP-DEL-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
; LOOP-DEL-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
-; LOOP-DEL-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[WHILE_END:%.*]]
-; LOOP-DEL: mismatch_sve_loop_found:
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
-; LOOP-DEL-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; LOOP-DEL-NEXT: br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[WHILE_END:%.*]]
+; LOOP-DEL: mismatch_vec_loop_found:
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
+; LOOP-DEL-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
; LOOP-DEL-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
; LOOP-DEL-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
-; LOOP-DEL-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; LOOP-DEL-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]]
; LOOP-DEL-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
; LOOP-DEL-NEXT: br label [[WHILE_END]]
; LOOP-DEL: mismatch_loop_pre:
@@ -397,7 +394,7 @@ define i32 @compare_bytes_signed_wrap(ptr %a, ptr %b, i32 %len, i32 %n) {
; LOOP-DEL-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
; LOOP-DEL-NEXT: br i1 [[TMP44]], label [[WHILE_END]], label [[MISMATCH_LOOP]]
; LOOP-DEL: while.end:
-; LOOP-DEL-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ]
; LOOP-DEL-NEXT: ret i32 [[MISMATCH_RESULT]]
;
; NO-TRANSFORM-LABEL: define i32 @compare_bytes_signed_wrap(
@@ -472,36 +469,36 @@ define i32 @compare_bytes_simple_end_ne_found(ptr %a, ptr %b, ptr %c, ptr %d, i3
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
-; CHECK: mismatch_sve_loop_preheader:
+; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; CHECK: mismatch_vec_loop_preheader:
; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
-; CHECK-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
-; CHECK: mismatch_sve_loop:
-; CHECK-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
-; CHECK-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
-; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]]
+; CHECK: mismatch_vec_loop:
+; CHECK-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]]
+; CHECK-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]]
+; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
; CHECK-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
-; CHECK-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
; CHECK-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
-; CHECK-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
-; CHECK: mismatch_sve_loop_inc:
-; CHECK-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
+; CHECK: mismatch_vec_loop_inc:
+; CHECK-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]]
; CHECK-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
-; CHECK-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
-; CHECK: mismatch_sve_loop_found:
-; CHECK-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; CHECK-NEXT: br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]]
+; CHECK: mismatch_vec_loop_found:
+; CHECK-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
; CHECK-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
-; CHECK-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; CHECK-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
; CHECK-NEXT: br label [[MISMATCH_END]]
; CHECK: mismatch_loop_pre:
@@ -520,7 +517,7 @@ define i32 @compare_bytes_simple_end_ne_found(ptr %a, ptr %b, ptr %c, ptr %d, i3
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
; CHECK-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
; CHECK: mismatch_end:
-; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ]
; CHECK-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
@@ -576,36 +573,36 @@ define i32 @compare_bytes_simple_end_ne_found(ptr %a, ptr %b, ptr %c, ptr %d, i3
; LOOP-DEL-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
; LOOP-DEL-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
; LOOP-DEL-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
-; LOOP-DEL-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
-; LOOP-DEL: mismatch_sve_loop_preheader:
+; LOOP-DEL-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; LOOP-DEL: mismatch_vec_loop_preheader:
; LOOP-DEL-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
; LOOP-DEL-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; LOOP-DEL-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
-; LOOP-DEL-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
-; LOOP-DEL: mismatch_sve_loop:
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
-; LOOP-DEL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
-; LOOP-DEL-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
-; LOOP-DEL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
-; LOOP-DEL-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]]
+; LOOP-DEL: mismatch_vec_loop:
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
+; LOOP-DEL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]]
+; LOOP-DEL-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]]
+; LOOP-DEL-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
; LOOP-DEL-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
-; LOOP-DEL-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; LOOP-DEL-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
; LOOP-DEL-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
-; LOOP-DEL-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
-; LOOP-DEL: mismatch_sve_loop_inc:
-; LOOP-DEL-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; LOOP-DEL-NEXT: br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
+; LOOP-DEL: mismatch_vec_loop_inc:
+; LOOP-DEL-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]]
; LOOP-DEL-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
; LOOP-DEL-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
-; LOOP-DEL-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[BYTE_COMPARE:%.*]]
-; LOOP-DEL: mismatch_sve_loop_found:
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
-; LOOP-DEL-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; LOOP-DEL-NEXT: br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[BYTE_COMPARE:%.*]]
+; LOOP-DEL: mismatch_vec_loop_found:
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
+; LOOP-DEL-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
; LOOP-DEL-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
; LOOP-DEL-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
-; LOOP-DEL-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; LOOP-DEL-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]]
; LOOP-DEL-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
; LOOP-DEL-NEXT: br label [[BYTE_COMPARE]]
; LOOP-DEL: mismatch_loop_pre:
@@ -624,7 +621,7 @@ define i32 @compare_bytes_simple_end_ne_found(ptr %a, ptr %b, ptr %c, ptr %d, i3
; LOOP-DEL-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
; LOOP-DEL-NEXT: br i1 [[TMP44]], label [[BYTE_COMPARE]], label [[MISMATCH_LOOP]]
; LOOP-DEL: byte.compare:
-; LOOP-DEL-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ]
; LOOP-DEL-NEXT: [[TMP45:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
; LOOP-DEL-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP45]], i32 [[N]], i32 [[MISMATCH_RESULT]]
; LOOP-DEL-NEXT: [[SPEC_SELECT4:%.*]] = select i1 [[TMP45]], ptr [[D]], ptr [[C]]
@@ -729,36 +726,36 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
-; CHECK: mismatch_sve_loop_preheader:
+; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; CHECK: mismatch_vec_loop_preheader:
; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
-; CHECK-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
-; CHECK: mismatch_sve_loop:
-; CHECK-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
-; CHECK-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
-; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]]
+; CHECK: mismatch_vec_loop:
+; CHECK-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]]
+; CHECK-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]]
+; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
; CHECK-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
-; CHECK-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
; CHECK-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
-; CHECK-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
-; CHECK: mismatch_sve_loop_inc:
-; CHECK-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
+; CHECK: mismatch_vec_loop_inc:
+; CHECK-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]]
; CHECK-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
-; CHECK-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
-; CHECK: mismatch_sve_loop_found:
-; CHECK-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; CHECK-NEXT: br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]]
+; CHECK: mismatch_vec_loop_found:
+; CHECK-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
; CHECK-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
-; CHECK-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; CHECK-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
; CHECK-NEXT: br label [[MISMATCH_END]]
; CHECK: mismatch_loop_pre:
@@ -777,7 +774,7 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
; CHECK-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
; CHECK: mismatch_end:
-; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ]
; CHECK-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
@@ -828,36 +825,36 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
; LOOP-DEL-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
; LOOP-DEL-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
; LOOP-DEL-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
-; LOOP-DEL-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
-; LOOP-DEL: mismatch_sve_loop_preheader:
+; LOOP-DEL-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; LOOP-DEL: mismatch_vec_loop_preheader:
; LOOP-DEL-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
; LOOP-DEL-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; LOOP-DEL-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
-; LOOP-DEL-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
-; LOOP-DEL: mismatch_sve_loop:
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
-; LOOP-DEL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
-; LOOP-DEL-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
-; LOOP-DEL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
-; LOOP-DEL-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]]
+; LOOP-DEL: mismatch_vec_loop:
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
+; LOOP-DEL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]]
+; LOOP-DEL-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]]
+; LOOP-DEL-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
; LOOP-DEL-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
-; LOOP-DEL-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; LOOP-DEL-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
; LOOP-DEL-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
-; LOOP-DEL-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
-; LOOP-DEL: mismatch_sve_loop_inc:
-; LOOP-DEL-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; LOOP-DEL-NEXT: br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
+; LOOP-DEL: mismatch_vec_loop_inc:
+; LOOP-DEL-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]]
; LOOP-DEL-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
; LOOP-DEL-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
-; LOOP-DEL-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[WHILE_END]]
-; LOOP-DEL: mismatch_sve_loop_found:
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
-; LOOP-DEL-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
-; LOOP-DEL-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; LOOP-DEL-NEXT: br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[WHILE_END]]
+; LOOP-DEL: mismatch_vec_loop_found:
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
+; LOOP-DEL-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
+; LOOP-DEL-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
; LOOP-DEL-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
; LOOP-DEL-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
-; LOOP-DEL-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; LOOP-DEL-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]]
; LOOP-DEL-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
; LOOP-DEL-NEXT: br label [[WHILE_END]]
; LOOP-DEL: mismatch_loop_pre:
@@ -876,7 +873,7 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
; LOOP-DEL-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
; LOOP-DEL-NEXT: br i1 [[TMP44]], label [[WHILE_END]], label [[MISMATCH_LOOP]]
; LOOP-DEL: while.end:
-; LOOP-DEL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; LOOP-DEL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ]
; LOOP-DEL-NEXT: ret i32 [[INC_LCSSA]]
;
; NO-TRANSFORM-LABEL: define i32 @compare_bytes_extra_cmp(
@@ -960,36 +957,36 @@ define void @compare_bytes_cleanup_block(ptr %src1, ptr %src2) {
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: br i1 [[TMP14]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
-; CHECK: mismatch_sve_loop_preheader:
+; CHECK-NEXT: br i1 [[TMP14]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; CHECK: mismatch_vec_loop_preheader:
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 1, i64 0)
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul nuw nsw i64 [[TMP16]], 16
-; CHECK-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
-; CHECK: mismatch_sve_loop:
-; CHECK-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP15]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP26:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ 1, [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP25:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[MISMATCH_SVE_INDEX]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP18]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[MISMATCH_SVE_INDEX]]
-; CHECK-NEXT: [[TMP21:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP20]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]]
+; CHECK: mismatch_vec_loop:
+; CHECK-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP15]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP26:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ 1, [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP25:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[MISMATCH_VEC_INDEX]]
+; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP18]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[MISMATCH_VEC_INDEX]]
+; CHECK-NEXT: [[TMP21:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP20]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
; CHECK-NEXT: [[TMP22:%.*]] = icmp ne <vscale x 16 x i8> [[TMP19]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP22]], <vscale x 16 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP22]], <vscale x 16 x i1> zeroinitializer
; CHECK-NEXT: [[TMP24:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP23]])
-; CHECK-NEXT: br i1 [[TMP24]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
-; CHECK: mismatch_sve_loop_inc:
-; CHECK-NEXT: [[TMP25]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP17]]
+; CHECK-NEXT: br i1 [[TMP24]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
+; CHECK: mismatch_vec_loop_inc:
+; CHECK-NEXT: [[TMP25]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP17]]
; CHECK-NEXT: [[TMP26]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP25]], i64 0)
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <vscale x 16 x i1> [[TMP26]], i64 0
-; CHECK-NEXT: br i1 [[TMP27]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
-; CHECK: mismatch_sve_loop_found:
-; CHECK-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP23]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
-; CHECK-NEXT: [[TMP28:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; CHECK-NEXT: br i1 [[TMP27]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]]
+; CHECK: mismatch_vec_loop_found:
+; CHECK-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP23]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
+; CHECK-NEXT: [[TMP28:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP28]], i1 true)
; CHECK-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64
-; CHECK-NEXT: [[TMP31:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP30]]
+; CHECK-NEXT: [[TMP31:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP30]]
; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32
; CHECK-NEXT: br label [[MISMATCH_END]]
; CHECK: mismatch_loop_pre:
@@ -1008,7 +1005,7 @@ define void @compare_bytes_cleanup_block(ptr %src1, ptr %src2) {
; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP39]], 0
; CHECK-NEXT: br i1 [[TMP40]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
; CHECK: mismatch_end:
-; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ 0, [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ 0, [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP32]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ 0, [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ 0, [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP32]], [[MISMATCH_VEC_LOOP_FOUND]] ]
; CHECK-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[LEN:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ], [ 0, [[MISMATCH_END]] ]
More information about the llvm-commits
mailing list