[llvm] [AArch64] Add AArch64PassRegistry.def (PR #85215)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 18 03:00:19 PDT 2024
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/85215
>From bde44fb80bdc91cc2637700d937ec9146920a755 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Thu, 14 Mar 2024 20:38:04 +0800
Subject: [PATCH 1/2] [AArch64] Add AArch64PassRegistry.def
---
.../llvm/Passes/TargetPassRegistry.inc | 304 +++
.../Target/AArch64/AArch64PassRegistry.def | 20 +
.../Target/AArch64/AArch64TargetMachine.cpp | 4 +
.../LoopIdiom/AArch64/byte-compare-index.ll | 1735 +++++++++++++++++
4 files changed, 2063 insertions(+)
create mode 100644 llvm/include/llvm/Passes/TargetPassRegistry.inc
create mode 100644 llvm/lib/Target/AArch64/AArch64PassRegistry.def
diff --git a/llvm/include/llvm/Passes/TargetPassRegistry.inc b/llvm/include/llvm/Passes/TargetPassRegistry.inc
new file mode 100644
index 00000000000000..7ae04802772730
--- /dev/null
+++ b/llvm/include/llvm/Passes/TargetPassRegistry.inc
@@ -0,0 +1,304 @@
+//===- TargetPassRegistry.def - Registry of passes --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is used as the registry of passes in registerPassBuilderCallbacks
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifdef GET_PASS_REGISTRY
+
+#if !__has_include(GET_PASS_REGISTRY)
+#error "must provide <Target>PassRegistry.def"
+#endif
+
+static constexpr bool HAVE_MODULE_ANALYSIS_ = std::size({
+ "",
+#define MODULE_ANALYSIS(NAME, CREATE_PASS) NAME,
+#include GET_PASS_REGISTRY
+#undef MODULE_ANALYSIS
+ }) > 1;
+
+static constexpr bool HAVE_MODULE_PASS_ = std::size({
+ "",
+#define MODULE_PASS(NAME, CREATE_PASS) NAME,
+#include GET_PASS_REGISTRY
+#undef MODULE_PASS
+ }) > 1;
+
+static constexpr bool HAVE_MODULE_PASS_WITH_PARAMS_ = std::size({
+ "",
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) NAME,
+#include GET_PASS_REGISTRY
+#undef MODULE_PASS_WITH_PARAMS
+ }) > 1;
+
+static constexpr bool HAVE_FUNCTION_ANALYSIS_ = std::size({
+ "",
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) NAME,
+#include GET_PASS_REGISTRY
+#undef FUNCTION_ANALYSIS
+ }) > 1;
+
+static constexpr bool HAVE_FUNCTION_ALIAS_ANALYSIS_ = std::size({
+ "",
+#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) NAME,
+#include GET_PASS_REGISTRY
+#undef FUNCTION_ALIAS_ANALYSIS
+ }) > 1;
+
+static constexpr bool HAVE_FUNCTION_PASS_ = std::size({
+ "",
+#define FUNCTION_PASS(NAME, CREATE_PASS) NAME,
+#include GET_PASS_REGISTRY
+#undef FUNCTION_PASS
+ }) > 1;
+
+static constexpr bool HAVE_FUNCTION_PASS_WITH_PARAMS_ = std::size({
+ "",
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ NAME,
+#include GET_PASS_REGISTRY
+#undef FUNCTION_PASS_WITH_PARAMS
+ }) > 1;
+
+static constexpr bool HAVE_LOOP_ANALYSIS_ = std::size({
+ "",
+#define LOOP_ANALYSIS(NAME, CREATE_PASS) NAME,
+#include GET_PASS_REGISTRY
+#undef LOOP_ANALYSIS
+ }) > 1;
+
+static constexpr bool HAVE_LOOP_PASS_ = std::size({
+ "",
+#define LOOP_PASS(NAME, CREATE_PASS) NAME,
+#include GET_PASS_REGISTRY
+#undef LOOP_PASS
+ }) > 1;
+
+static constexpr bool HAVE_MACHINE_FUNCTION_ANALYSIS_ = std::size({
+ "",
+#define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) NAME,
+#include GET_PASS_REGISTRY
+#undef MACHINE_FUNCTION_ANALYSIS
+ }) > 1;
+
+static constexpr bool HAVE_MACHINE_FUNCTION_PASS_ = std::size({
+ "",
+#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) NAME,
+#include GET_PASS_REGISTRY
+#undef MACHINE_FUNCTION_PASS
+ }) > 1;
+
+static constexpr bool HAVE_MACHINE_FUNCTION_PASS_WITH_PARAMS_ = std::size({
+ "",
+#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, \
+ PARAMS) \
+ NAME,
+#include GET_PASS_REGISTRY
+#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
+ }) > 1;
+
+if (PopulateClassToPassNames) {
+ auto *PIC = PB.getPassInstrumentationCallbacks();
+
+#define ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS) \
+ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS) \
+ PIC->addClassToPassName(CLASS, NAME);
+
+#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
+ ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
+#define MODULE_PASS(NAME, CREATE_PASS) \
+ ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
+ ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
+#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
+ ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
+#define FUNCTION_PASS(NAME, CREATE_PASS) \
+ ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)
+#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
+ ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
+#define LOOP_PASS(NAME, CREATE_PASS) \
+ ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
+#define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
+ ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
+#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) \
+ ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
+#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, \
+ PARAMS) \
+ ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CREATE_PASS)
+#include GET_PASS_REGISTRY
+#undef MODULE_ANALYSIS
+#undef MODULE_PASS
+#undef MODULE_PASS_WITH_PARAMS
+#undef FUNCTION_ANALYSIS
+#undef FUNCTION_PASS
+#undef FUNCTION_PASS_WITH_PARAMS
+#undef LOOP_ANALYSIS
+#undef LOOP_PASS
+#undef MACHINE_FUNCTION_ANALYSIS
+#undef MACHINE_FUNCTION_PASS
+#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
+#undef ADD_CLASS_PASS_TO_PASS_NAME
+#undef ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS
+}
+
+#define ADD_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ PM.addPass(CREATE_PASS); \
+ return true; \
+ }
+
+#define ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
+ if (PassBuilder::checkParametrizedPassName(Name, NAME)) { \
+ auto Params = PassBuilder::parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return false; \
+ PM.addPass(CREATE_PASS(Params.get())); \
+ return true; \
+ }
+
+if constexpr (HAVE_MODULE_PASS_) {
+ PB.registerPipelineParsingCallback(
+ [=](StringRef Name, ModulePassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+#define MODULE_PASS(NAME, CREATE_PASS) ADD_PASS(NAME, CREATE_PASS)
+#include GET_PASS_REGISTRY
+#undef MODULE_PASS
+ return false;
+ });
+}
+
+if constexpr (HAVE_MODULE_PASS_WITH_PARAMS_) {
+ PB.registerPipelineParsingCallback(
+ [=](StringRef Name, ModulePassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
+#include GET_PASS_REGISTRY
+#undef MODULE_PASS_WITH_PARAMS
+ return false;
+ });
+}
+
+if constexpr (HAVE_FUNCTION_PASS_) {
+ PB.registerPipelineParsingCallback(
+ [=](StringRef Name, FunctionPassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+#define FUNCTION_PASS(NAME, CREATE_PASS) ADD_PASS(NAME, CREATE_PASS)
+#include GET_PASS_REGISTRY
+#undef FUNCTION_PASS
+ return false;
+ });
+}
+
+if constexpr (HAVE_FUNCTION_PASS_WITH_PARAMS_) {
+ PB.registerPipelineParsingCallback(
+ [=](StringRef Name, FunctionPassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
+#include GET_PASS_REGISTRY
+#undef FUNCTION_PASS_WITH_PARAMS
+ return false;
+ });
+}
+
+if constexpr (HAVE_LOOP_PASS_) {
+ PB.registerPipelineParsingCallback(
+ [=](StringRef Name, LoopPassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+#define LOOP_PASS(NAME, CREATE_PASS) ADD_PASS(NAME, CREATE_PASS)
+#include GET_PASS_REGISTRY
+ return false;
+ });
+}
+
+if constexpr (HAVE_MACHINE_FUNCTION_PASS_) {
+ PB.registerPipelineParsingCallback(
+ [=](StringRef Name, MachineFunctionPassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) ADD_PASS(NAME, CREATE_PASS)
+#include GET_PASS_REGISTRY
+ return false;
+ });
+}
+
+if constexpr (HAVE_MACHINE_FUNCTION_PASS_WITH_PARAMS_) {
+ PB.registerPipelineParsingCallback(
+ [=](StringRef Name, FunctionPassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, \
+ PARAMS) \
+ ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
+#include GET_PASS_REGISTRY
+#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
+ return false;
+ });
+}
+#undef ADD_PASS
+#undef ADD_PASS_WITH_PARAMS
+
+if constexpr (HAVE_MODULE_ANALYSIS_) {
+ PB.registerAnalysisRegistrationCallback([](ModuleAnalysisManager &AM) {
+#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
+ AM.registerPass([&] { return CREATE_PASS; });
+#include GET_PASS_REGISTRY
+#undef MODULE_ANALYSIS
+ });
+}
+
+if constexpr (HAVE_FUNCTION_ANALYSIS_) {
+ PB.registerAnalysisRegistrationCallback([](FunctionAnalysisManager &AM) {
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
+ AM.registerPass([&] { return CREATE_PASS; });
+#include GET_PASS_REGISTRY
+#undef FUNCTION_ANALYSIS
+ });
+}
+
+if constexpr (HAVE_FUNCTION_ALIAS_ANALYSIS_) {
+ PB.registerParseAACallback([](StringRef Name, AAManager &AM) {
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ AM.registerFunctionAnalysis< \
+ std::remove_reference_t<decltype(CREATE_PASS)>>(); \
+ return true; \
+ }
+#include GET_PASS_REGISTRY
+#undef FUNCTION_ANALYSIS
+ return false;
+ });
+}
+
+if constexpr (HAVE_LOOP_ANALYSIS_) {
+ PB.registerAnalysisRegistrationCallback([](LoopAnalysisManager &AM) {
+#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
+ AM.registerPass([&] { return CREATE_PASS; });
+#include GET_PASS_REGISTRY
+#undef LOOP_ANALYSIS
+ });
+}
+
+if constexpr (HAVE_MACHINE_FUNCTION_ANALYSIS_) {
+ PB.registerAnalysisRegistrationCallback(
+ [](MachineFunctionAnalysisManager &AM) {
+#define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
+ AM.registerPass([&] { return CREATE_PASS; });
+#include GET_PASS_REGISTRY
+#undef MACHINE_FUNCTION_ANALYSIS
+ });
+}
+#undef GET_PASS_REGISTRY
+#endif // GET_PASS_REGISTRY
diff --git a/llvm/lib/Target/AArch64/AArch64PassRegistry.def b/llvm/lib/Target/AArch64/AArch64PassRegistry.def
new file mode 100644
index 00000000000000..ca944579f93a93
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64PassRegistry.def
@@ -0,0 +1,20 @@
+//===- AArch64PassRegistry.def - Registry of AArch64 passes -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is used as the registry of passes that are part of the
+// AArch64 backend.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef LOOP_PASS
+#define LOOP_PASS(NAME, CREATE_PASS)
+#endif
+LOOP_PASS("aarch64-lit", AArch64LoopIdiomTransformPass())
+#undef LOOP_PASS
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index e5e60459e8148a..08238fdf167bf2 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -547,6 +547,10 @@ class AArch64PassConfig : public TargetPassConfig {
void AArch64TargetMachine::registerPassBuilderCallbacks(
PassBuilder &PB, bool PopulateClassToPassNames) {
+
+#define GET_PASS_REGISTRY "AArch64PassRegistry.def"
+#include "llvm/Passes/TargetPassRegistry.inc"
+
PB.registerLateLoopOptimizationsEPCallback(
[=](LoopPassManager &LPM, OptimizationLevel Level) {
LPM.addPass(AArch64LoopIdiomTransformPass());
diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll b/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll
index e6a0c5f45375fd..b484a04a10cbba 100644
--- a/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll
+++ b/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll
@@ -2,6 +2,9 @@
; RUN: opt -aarch64-lit -aarch64-lit-verify -verify-dom-info -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s
; RUN: opt -aarch64-lit -simplifycfg -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=LOOP-DEL
; RUN: opt -aarch64-lit -mtriple aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=NO-TRANSFORM
+; RUN: opt -p aarch64-lit -aarch64-lit-verify -verify-dom-info -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-NPM
+; RUN: opt -p 'simplifycfg,loop(aarch64-lit)' -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=LOOP-DEL-NPM
+; RUN: opt -p aarch64-lit -mtriple aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=NO-TRANSFORM-NPM
define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) {
; CHECK-LABEL: define i32 @compare_bytes_simple(
@@ -200,6 +203,223 @@ define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) {
; NO-TRANSFORM-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]]
; NO-TRANSFORM-NEXT: ret i32 [[RES]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_simple(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1
+; CHECK-NPM-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]]
+; CHECK-NPM: mismatch_min_it_check:
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NPM-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64
+; CHECK-NPM-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-NPM: mismatch_mem_check:
+; CHECK-NPM-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
+; CHECK-NPM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-NPM-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+; CHECK-NPM-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
+; CHECK-NPM-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
+; CHECK-NPM-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-NPM-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NPM-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NPM-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP7]], 12
+; CHECK-NPM-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP10]], 12
+; CHECK-NPM-NEXT: [[TMP14:%.*]] = lshr i64 [[TMP6]], 12
+; CHECK-NPM-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP11]], 12
+; CHECK-NPM-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
+; CHECK-NPM-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
+; CHECK-NPM-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
+; CHECK-NPM-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NPM: mismatch_sve_loop_preheader:
+; CHECK-NPM-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
+; CHECK-NPM-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NPM-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
+; CHECK-NPM-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
+; CHECK-NPM: mismatch_sve_loop:
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
+; CHECK-NPM-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
+; CHECK-NPM-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NPM-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
+; CHECK-NPM-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NPM-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
+; CHECK-NPM-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; CHECK-NPM-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
+; CHECK-NPM-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
+; CHECK-NPM: mismatch_sve_loop_inc:
+; CHECK-NPM-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; CHECK-NPM-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
+; CHECK-NPM-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
+; CHECK-NPM-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
+; CHECK-NPM: mismatch_sve_loop_found:
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; CHECK-NPM-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
+; CHECK-NPM-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
+; CHECK-NPM-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; CHECK-NPM-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
+; CHECK-NPM-NEXT: br label [[MISMATCH_END]]
+; CHECK-NPM: mismatch_loop_pre:
+; CHECK-NPM-NEXT: br label [[MISMATCH_LOOP:%.*]]
+; CHECK-NPM: mismatch_loop:
+; CHECK-NPM-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
+; CHECK-NPM-NEXT: [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
+; CHECK-NPM-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]]
+; CHECK-NPM-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
+; CHECK-NPM-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]]
+; CHECK-NPM-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
+; CHECK-NPM-NEXT: [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]]
+; CHECK-NPM-NEXT: br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
+; CHECK-NPM: mismatch_loop_inc:
+; CHECK-NPM-NEXT: [[TMP43]] = add i32 [[MISMATCH_INDEX]], 1
+; CHECK-NPM-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
+; CHECK-NPM: mismatch_end:
+; CHECK-NPM-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; CHECK-NPM-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK-NPM: byte.compare:
+; CHECK-NPM-NEXT: br label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
+; CHECK-NPM-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ], [ [[EXTRA]], [[BYTE_COMPARE]] ]
+; CHECK-NPM-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]]
+; CHECK-NPM-NEXT: ret i32 [[RES]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_simple(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]]
+; LOOP-DEL-NPM: mismatch_min_it_check:
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0:![0-9]+]]
+; LOOP-DEL-NPM: mismatch_mem_check:
+; LOOP-DEL-NPM-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
+; LOOP-DEL-NPM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; LOOP-DEL-NPM-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
+; LOOP-DEL-NPM-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; LOOP-DEL-NPM-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP7]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP10]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP14:%.*]] = lshr i64 [[TMP6]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP11]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
+; LOOP-DEL-NPM-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
+; LOOP-DEL-NPM-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1:![0-9]+]]
+; LOOP-DEL-NPM: mismatch_sve_loop_preheader:
+; LOOP-DEL-NPM-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
+; LOOP-DEL-NPM-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; LOOP-DEL-NPM-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
+; LOOP-DEL-NPM: mismatch_sve_loop:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
+; LOOP-DEL-NPM-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
+; LOOP-DEL-NPM-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NPM-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
+; LOOP-DEL-NPM-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NPM-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
+; LOOP-DEL-NPM-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; LOOP-DEL-NPM-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
+; LOOP-DEL-NPM: mismatch_sve_loop_inc:
+; LOOP-DEL-NPM-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; LOOP-DEL-NPM-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
+; LOOP-DEL-NPM-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
+; LOOP-DEL-NPM: mismatch_sve_loop_found:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
+; LOOP-DEL-NPM-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; LOOP-DEL-NPM-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
+; LOOP-DEL-NPM-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; LOOP-DEL-NPM-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_END]]
+; LOOP-DEL-NPM: mismatch_loop_pre:
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_LOOP:%.*]]
+; LOOP-DEL-NPM: mismatch_loop:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]]
+; LOOP-DEL-NPM-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
+; LOOP-DEL-NPM-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]]
+; LOOP-DEL-NPM-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
+; LOOP-DEL-NPM-NEXT: [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
+; LOOP-DEL-NPM: mismatch_loop_inc:
+; LOOP-DEL-NPM-NEXT: [[TMP43]] = add i32 [[MISMATCH_INDEX]], 1
+; LOOP-DEL-NPM-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
+; LOOP-DEL-NPM: mismatch_end:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; LOOP-DEL-NPM-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL-NPM: byte.compare:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
+; LOOP-DEL-NPM-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ], [ [[EXTRA]], [[BYTE_COMPARE]] ]
+; LOOP-DEL-NPM-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]]
+; LOOP-DEL-NPM-NEXT: ret i32 [[RES]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_simple(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]]
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[RES]]
+;
entry:
br label %while.cond
@@ -418,6 +638,217 @@ define i32 @compare_bytes_signed_wrap(ptr %a, ptr %b, i32 %len, i32 %n) {
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_signed_wrap(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1
+; CHECK-NPM-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]]
+; CHECK-NPM: mismatch_min_it_check:
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NPM-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64
+; CHECK-NPM-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
+; CHECK-NPM: mismatch_mem_check:
+; CHECK-NPM-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
+; CHECK-NPM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-NPM-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+; CHECK-NPM-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
+; CHECK-NPM-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
+; CHECK-NPM-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-NPM-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NPM-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NPM-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP7]], 12
+; CHECK-NPM-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP10]], 12
+; CHECK-NPM-NEXT: [[TMP14:%.*]] = lshr i64 [[TMP6]], 12
+; CHECK-NPM-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP11]], 12
+; CHECK-NPM-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
+; CHECK-NPM-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
+; CHECK-NPM-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
+; CHECK-NPM-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; CHECK-NPM: mismatch_sve_loop_preheader:
+; CHECK-NPM-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
+; CHECK-NPM-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NPM-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
+; CHECK-NPM-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
+; CHECK-NPM: mismatch_sve_loop:
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
+; CHECK-NPM-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
+; CHECK-NPM-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NPM-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
+; CHECK-NPM-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NPM-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
+; CHECK-NPM-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; CHECK-NPM-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
+; CHECK-NPM-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
+; CHECK-NPM: mismatch_sve_loop_inc:
+; CHECK-NPM-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; CHECK-NPM-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
+; CHECK-NPM-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
+; CHECK-NPM-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
+; CHECK-NPM: mismatch_sve_loop_found:
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; CHECK-NPM-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
+; CHECK-NPM-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
+; CHECK-NPM-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; CHECK-NPM-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
+; CHECK-NPM-NEXT: br label [[MISMATCH_END]]
+; CHECK-NPM: mismatch_loop_pre:
+; CHECK-NPM-NEXT: br label [[MISMATCH_LOOP:%.*]]
+; CHECK-NPM: mismatch_loop:
+; CHECK-NPM-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
+; CHECK-NPM-NEXT: [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
+; CHECK-NPM-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]]
+; CHECK-NPM-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
+; CHECK-NPM-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]]
+; CHECK-NPM-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
+; CHECK-NPM-NEXT: [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]]
+; CHECK-NPM-NEXT: br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
+; CHECK-NPM: mismatch_loop_inc:
+; CHECK-NPM-NEXT: [[TMP43]] = add nsw i32 [[MISMATCH_INDEX]], 1
+; CHECK-NPM-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
+; CHECK-NPM: mismatch_end:
+; CHECK-NPM-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; CHECK-NPM-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC:%.*]] = add nsw i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK-NPM: byte.compare:
+; CHECK-NPM-NEXT: br label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
+; CHECK-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_signed_wrap(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]]
+; LOOP-DEL-NPM: mismatch_min_it_check:
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
+; LOOP-DEL-NPM: mismatch_mem_check:
+; LOOP-DEL-NPM-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
+; LOOP-DEL-NPM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; LOOP-DEL-NPM-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
+; LOOP-DEL-NPM-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; LOOP-DEL-NPM-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP7]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP10]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP14:%.*]] = lshr i64 [[TMP6]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP11]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
+; LOOP-DEL-NPM-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
+; LOOP-DEL-NPM-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; LOOP-DEL-NPM: mismatch_sve_loop_preheader:
+; LOOP-DEL-NPM-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
+; LOOP-DEL-NPM-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; LOOP-DEL-NPM-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
+; LOOP-DEL-NPM: mismatch_sve_loop:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
+; LOOP-DEL-NPM-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
+; LOOP-DEL-NPM-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NPM-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
+; LOOP-DEL-NPM-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NPM-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
+; LOOP-DEL-NPM-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; LOOP-DEL-NPM-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
+; LOOP-DEL-NPM: mismatch_sve_loop_inc:
+; LOOP-DEL-NPM-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; LOOP-DEL-NPM-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
+; LOOP-DEL-NPM-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
+; LOOP-DEL-NPM: mismatch_sve_loop_found:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
+; LOOP-DEL-NPM-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; LOOP-DEL-NPM-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
+; LOOP-DEL-NPM-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; LOOP-DEL-NPM-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_END]]
+; LOOP-DEL-NPM: mismatch_loop_pre:
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_LOOP:%.*]]
+; LOOP-DEL-NPM: mismatch_loop:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]]
+; LOOP-DEL-NPM-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
+; LOOP-DEL-NPM-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]]
+; LOOP-DEL-NPM-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
+; LOOP-DEL-NPM-NEXT: [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
+; LOOP-DEL-NPM: mismatch_loop_inc:
+; LOOP-DEL-NPM-NEXT: [[TMP43]] = add nsw i32 [[MISMATCH_INDEX]], 1
+; LOOP-DEL-NPM-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
+; LOOP-DEL-NPM: mismatch_end:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; LOOP-DEL-NPM-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC:%.*]] = add nsw i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL-NPM: byte.compare:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
+; LOOP-DEL-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_signed_wrap(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add nsw i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
entry:
br label %while.cond
@@ -659,6 +1090,166 @@ define i32 @compare_bytes_simple_end_ne_found(ptr %a, ptr %b, ptr %c, ptr %d, i3
; NO-TRANSFORM-NEXT: store i32 [[MISMATCH_INDEX]], ptr [[STORE_PTR]], align 4
; NO-TRANSFORM-NEXT: ret i32 [[MISMATCH_INDEX]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_simple_end_ne_found(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1
+; CHECK-NPM-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]]
+; CHECK-NPM: mismatch_min_it_check:
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NPM-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64
+; CHECK-NPM-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
+; CHECK-NPM: mismatch_mem_check:
+; CHECK-NPM-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
+; CHECK-NPM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-NPM-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+; CHECK-NPM-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
+; CHECK-NPM-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
+; CHECK-NPM-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-NPM-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NPM-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NPM-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP7]], 12
+; CHECK-NPM-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP10]], 12
+; CHECK-NPM-NEXT: [[TMP14:%.*]] = lshr i64 [[TMP6]], 12
+; CHECK-NPM-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP11]], 12
+; CHECK-NPM-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
+; CHECK-NPM-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
+; CHECK-NPM-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
+; CHECK-NPM-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; CHECK-NPM: mismatch_sve_loop_preheader:
+; CHECK-NPM-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
+; CHECK-NPM-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NPM-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
+; CHECK-NPM-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
+; CHECK-NPM: mismatch_sve_loop:
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
+; CHECK-NPM-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
+; CHECK-NPM-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NPM-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
+; CHECK-NPM-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NPM-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
+; CHECK-NPM-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; CHECK-NPM-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
+; CHECK-NPM-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
+; CHECK-NPM: mismatch_sve_loop_inc:
+; CHECK-NPM-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; CHECK-NPM-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
+; CHECK-NPM-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
+; CHECK-NPM-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
+; CHECK-NPM: mismatch_sve_loop_found:
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; CHECK-NPM-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
+; CHECK-NPM-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
+; CHECK-NPM-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; CHECK-NPM-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
+; CHECK-NPM-NEXT: br label [[MISMATCH_END]]
+; CHECK-NPM: mismatch_loop_pre:
+; CHECK-NPM-NEXT: br label [[MISMATCH_LOOP:%.*]]
+; CHECK-NPM: mismatch_loop:
+; CHECK-NPM-NEXT: [[MISMATCH_INDEX3:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
+; CHECK-NPM-NEXT: [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX3]] to i64
+; CHECK-NPM-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]]
+; CHECK-NPM-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
+; CHECK-NPM-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]]
+; CHECK-NPM-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
+; CHECK-NPM-NEXT: [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]]
+; CHECK-NPM-NEXT: br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
+; CHECK-NPM: mismatch_loop_inc:
+; CHECK-NPM-NEXT: [[TMP43]] = add i32 [[MISMATCH_INDEX3]], 1
+; CHECK-NPM-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
+; CHECK-NPM: mismatch_end:
+; CHECK-NPM-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; CHECK-NPM-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_FOUND:%.*]]
+; CHECK-NPM: while.found:
+; CHECK-NPM-NEXT: [[MISMATCH_INDEX1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
+; CHECK-NPM-NEXT: [[FOUND_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[C]], [[BYTE_COMPARE]] ]
+; CHECK-NPM-NEXT: br label [[END:%.*]]
+; CHECK-NPM: byte.compare:
+; CHECK-NPM-NEXT: [[TMP47:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[TMP47]], label [[WHILE_END]], label [[WHILE_FOUND]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[MISMATCH_INDEX2:%.*]] = phi i32 [ [[N]], [[WHILE_COND]] ], [ [[N]], [[BYTE_COMPARE]] ]
+; CHECK-NPM-NEXT: [[END_PTR:%.*]] = phi ptr [ [[D]], [[WHILE_COND]] ], [ [[D]], [[BYTE_COMPARE]] ]
+; CHECK-NPM-NEXT: br label [[END]]
+; CHECK-NPM: end:
+; CHECK-NPM-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[MISMATCH_INDEX1]], [[WHILE_FOUND]] ], [ [[MISMATCH_INDEX2]], [[WHILE_END]] ]
+; CHECK-NPM-NEXT: [[STORE_PTR:%.*]] = phi ptr [ [[END_PTR]], [[WHILE_END]] ], [ [[FOUND_PTR]], [[WHILE_FOUND]] ]
+; CHECK-NPM-NEXT: store i32 [[MISMATCH_INDEX]], ptr [[STORE_PTR]], align 4
+; CHECK-NPM-NEXT: ret i32 [[MISMATCH_INDEX]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_simple_end_ne_found(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[END]]
+; LOOP-DEL-NPM: end:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: [[STORE_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[D]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: store i32 [[MISMATCH_INDEX]], ptr [[STORE_PTR]], align 4
+; LOOP-DEL-NPM-NEXT: ret i32 [[MISMATCH_INDEX]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_simple_end_ne_found(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_FOUND:%.*]]
+; NO-TRANSFORM-NPM: while.found:
+; NO-TRANSFORM-NPM-NEXT: [[MISMATCH_INDEX1:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
+; NO-TRANSFORM-NPM-NEXT: [[FOUND_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ]
+; NO-TRANSFORM-NPM-NEXT: br label [[END:%.*]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[MISMATCH_INDEX2:%.*]] = phi i32 [ [[N]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: [[END_PTR:%.*]] = phi ptr [ [[D]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: br label [[END]]
+; NO-TRANSFORM-NPM: end:
+; NO-TRANSFORM-NPM-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[MISMATCH_INDEX1]], [[WHILE_FOUND]] ], [ [[MISMATCH_INDEX2]], [[WHILE_END]] ]
+; NO-TRANSFORM-NPM-NEXT: [[STORE_PTR:%.*]] = phi ptr [ [[END_PTR]], [[WHILE_END]] ], [ [[FOUND_PTR]], [[WHILE_FOUND]] ]
+; NO-TRANSFORM-NPM-NEXT: store i32 [[MISMATCH_INDEX]], ptr [[STORE_PTR]], align 4
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[MISMATCH_INDEX]]
+;
entry:
br label %while.cond
@@ -897,6 +1488,235 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ], [ [[X]], [[ENTRY:%.*]] ]
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_extra_cmp(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]]
+; CHECK-NPM-NEXT: br i1 [[CMP_X]], label [[PH:%.*]], label [[WHILE_END:%.*]]
+; CHECK-NPM: ph:
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1
+; CHECK-NPM-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]]
+; CHECK-NPM: mismatch_min_it_check:
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NPM-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64
+; CHECK-NPM-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
+; CHECK-NPM: mismatch_mem_check:
+; CHECK-NPM-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
+; CHECK-NPM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-NPM-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+; CHECK-NPM-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
+; CHECK-NPM-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
+; CHECK-NPM-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-NPM-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NPM-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NPM-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP7]], 12
+; CHECK-NPM-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP10]], 12
+; CHECK-NPM-NEXT: [[TMP14:%.*]] = lshr i64 [[TMP6]], 12
+; CHECK-NPM-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP11]], 12
+; CHECK-NPM-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
+; CHECK-NPM-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
+; CHECK-NPM-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
+; CHECK-NPM-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; CHECK-NPM: mismatch_sve_loop_preheader:
+; CHECK-NPM-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
+; CHECK-NPM-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NPM-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
+; CHECK-NPM-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
+; CHECK-NPM: mismatch_sve_loop:
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
+; CHECK-NPM-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
+; CHECK-NPM-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NPM-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
+; CHECK-NPM-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NPM-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
+; CHECK-NPM-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; CHECK-NPM-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
+; CHECK-NPM-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
+; CHECK-NPM: mismatch_sve_loop_inc:
+; CHECK-NPM-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; CHECK-NPM-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
+; CHECK-NPM-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
+; CHECK-NPM-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
+; CHECK-NPM: mismatch_sve_loop_found:
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; CHECK-NPM-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
+; CHECK-NPM-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
+; CHECK-NPM-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; CHECK-NPM-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
+; CHECK-NPM-NEXT: br label [[MISMATCH_END]]
+; CHECK-NPM: mismatch_loop_pre:
+; CHECK-NPM-NEXT: br label [[MISMATCH_LOOP:%.*]]
+; CHECK-NPM: mismatch_loop:
+; CHECK-NPM-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
+; CHECK-NPM-NEXT: [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
+; CHECK-NPM-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]]
+; CHECK-NPM-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
+; CHECK-NPM-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]]
+; CHECK-NPM-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
+; CHECK-NPM-NEXT: [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]]
+; CHECK-NPM-NEXT: br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
+; CHECK-NPM: mismatch_loop_inc:
+; CHECK-NPM-NEXT: [[TMP43]] = add i32 [[MISMATCH_INDEX]], 1
+; CHECK-NPM-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
+; CHECK-NPM: mismatch_end:
+; CHECK-NPM-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; CHECK-NPM-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]]
+; CHECK-NPM: byte.compare:
+; CHECK-NPM-NEXT: br label [[WHILE_END_LOOPEXIT]]
+; CHECK-NPM: while.end.loopexit:
+; CHECK-NPM-NEXT: [[INC_LCSSA1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
+; CHECK-NPM-NEXT: br label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_extra_cmp(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_X]], label [[WHILE_COND_PREHEADER:%.*]], label [[WHILE_END:%.*]]
+; LOOP-DEL-NPM: while.cond.preheader:
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]]
+; LOOP-DEL-NPM: mismatch_min_it_check:
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
+; LOOP-DEL-NPM: mismatch_mem_check:
+; LOOP-DEL-NPM-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
+; LOOP-DEL-NPM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; LOOP-DEL-NPM-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
+; LOOP-DEL-NPM-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; LOOP-DEL-NPM-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP7]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP10]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP14:%.*]] = lshr i64 [[TMP6]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP11]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
+; LOOP-DEL-NPM-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
+; LOOP-DEL-NPM-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; LOOP-DEL-NPM: mismatch_sve_loop_preheader:
+; LOOP-DEL-NPM-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
+; LOOP-DEL-NPM-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; LOOP-DEL-NPM-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
+; LOOP-DEL-NPM: mismatch_sve_loop:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
+; LOOP-DEL-NPM-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_SVE_INDEX]]
+; LOOP-DEL-NPM-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NPM-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_SVE_INDEX]]
+; LOOP-DEL-NPM-NEXT: [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NPM-NEXT: [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
+; LOOP-DEL-NPM-NEXT: [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
+; LOOP-DEL-NPM-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP28]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
+; LOOP-DEL-NPM: mismatch_sve_loop_inc:
+; LOOP-DEL-NPM-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP21]]
+; LOOP-DEL-NPM-NEXT: [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
+; LOOP-DEL-NPM-NEXT: [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP31]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
+; LOOP-DEL-NPM: mismatch_sve_loop_found:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_SVE_LOOP]] ]
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
+; LOOP-DEL-NPM-NEXT: [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; LOOP-DEL-NPM-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
+; LOOP-DEL-NPM-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP34]]
+; LOOP-DEL-NPM-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_END]]
+; LOOP-DEL-NPM: mismatch_loop_pre:
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_LOOP:%.*]]
+; LOOP-DEL-NPM: mismatch_loop:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]]
+; LOOP-DEL-NPM-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
+; LOOP-DEL-NPM-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]]
+; LOOP-DEL-NPM-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
+; LOOP-DEL-NPM-NEXT: [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
+; LOOP-DEL-NPM: mismatch_loop_inc:
+; LOOP-DEL-NPM-NEXT: [[TMP43]] = add i32 [[MISMATCH_INDEX]], 1
+; LOOP-DEL-NPM-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
+; LOOP-DEL-NPM: mismatch_end:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; LOOP-DEL-NPM-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ], [ [[LEN]], [[MISMATCH_END]] ]
+; LOOP-DEL-NPM-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]]
+; LOOP-DEL-NPM: byte.compare:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_END_LOOPEXIT]]
+; LOOP-DEL-NPM: while.end.loopexit:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ]
+; LOOP-DEL-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_extra_cmp(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_X]], label [[PH:%.*]], label [[WHILE_END:%.*]]
+; NO-TRANSFORM-NPM: ph:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[PH]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]]
+; NO-TRANSFORM-NPM: while.end.loopexit:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA1:%.*]] = phi i32 [ [[INC]], [[WHILE_COND]] ], [ [[INC]], [[WHILE_BODY]] ]
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ]
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
entry:
%cmp.x = icmp ult i32 %n, %x
br i1 %cmp.x, label %ph, label %while.end
@@ -1061,6 +1881,213 @@ define void @compare_bytes_cleanup_block(ptr %src1, ptr %src2) {
; NO-TRANSFORM-NEXT: [[RES:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
; NO-TRANSFORM-NEXT: ret void
;
+; CHECK-NPM-LABEL: define void @compare_bytes_cleanup_block(
+; CHECK-NPM-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]]
+; CHECK-NPM: mismatch_min_it_check:
+; CHECK-NPM-NEXT: br i1 false, label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
+; CHECK-NPM: mismatch_mem_check:
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC1]], i64 1
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[SRC2]], i64 1
+; CHECK-NPM-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+; CHECK-NPM-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NPM-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC1]], i64 0
+; CHECK-NPM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC2]], i64 0
+; CHECK-NPM-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64
+; CHECK-NPM-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64
+; CHECK-NPM-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP3]], 12
+; CHECK-NPM-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP6]], 12
+; CHECK-NPM-NEXT: [[TMP10:%.*]] = lshr i64 [[TMP2]], 12
+; CHECK-NPM-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP7]], 12
+; CHECK-NPM-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP9]]
+; CHECK-NPM-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
+; CHECK-NPM-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
+; CHECK-NPM-NEXT: br i1 [[TMP14]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; CHECK-NPM: mismatch_sve_loop_preheader:
+; CHECK-NPM-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 1, i64 0)
+; CHECK-NPM-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NPM-NEXT: [[TMP17:%.*]] = mul nuw nsw i64 [[TMP16]], 16
+; CHECK-NPM-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
+; CHECK-NPM: mismatch_sve_loop:
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP15]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP26:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ 1, [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP25:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
+; CHECK-NPM-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[MISMATCH_SVE_INDEX]]
+; CHECK-NPM-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP18]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NPM-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[MISMATCH_SVE_INDEX]]
+; CHECK-NPM-NEXT: [[TMP21:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP20]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NPM-NEXT: [[TMP22:%.*]] = icmp ne <vscale x 16 x i8> [[TMP19]], [[TMP21]]
+; CHECK-NPM-NEXT: [[TMP23:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP22]], <vscale x 16 x i1> zeroinitializer
+; CHECK-NPM-NEXT: [[TMP24:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP23]])
+; CHECK-NPM-NEXT: br i1 [[TMP24]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
+; CHECK-NPM: mismatch_sve_loop_inc:
+; CHECK-NPM-NEXT: [[TMP25]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP17]]
+; CHECK-NPM-NEXT: [[TMP26]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP25]], i64 0)
+; CHECK-NPM-NEXT: [[TMP27:%.*]] = extractelement <vscale x 16 x i1> [[TMP26]], i64 0
+; CHECK-NPM-NEXT: br i1 [[TMP27]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
+; CHECK-NPM: mismatch_sve_loop_found:
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP23]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
+; CHECK-NPM-NEXT: [[TMP28:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; CHECK-NPM-NEXT: [[TMP29:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP28]], i1 true)
+; CHECK-NPM-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64
+; CHECK-NPM-NEXT: [[TMP31:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP30]]
+; CHECK-NPM-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32
+; CHECK-NPM-NEXT: br label [[MISMATCH_END]]
+; CHECK-NPM: mismatch_loop_pre:
+; CHECK-NPM-NEXT: br label [[MISMATCH_LOOP:%.*]]
+; CHECK-NPM: mismatch_loop:
+; CHECK-NPM-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ 1, [[MISMATCH_LOOP_PRE]] ], [ [[TMP39:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
+; CHECK-NPM-NEXT: [[TMP33:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
+; CHECK-NPM-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[TMP33]]
+; CHECK-NPM-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
+; CHECK-NPM-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[TMP33]]
+; CHECK-NPM-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
+; CHECK-NPM-NEXT: [[TMP38:%.*]] = icmp eq i8 [[TMP35]], [[TMP37]]
+; CHECK-NPM-NEXT: br i1 [[TMP38]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
+; CHECK-NPM: mismatch_loop_inc:
+; CHECK-NPM-NEXT: [[TMP39]] = add i32 [[MISMATCH_INDEX]], 1
+; CHECK-NPM-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP39]], 0
+; CHECK-NPM-NEXT: br i1 [[TMP40]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
+; CHECK-NPM: mismatch_end:
+; CHECK-NPM-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ 0, [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ 0, [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP32]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; CHECK-NPM-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ], [ 0, [[MISMATCH_END]] ]
+; CHECK-NPM-NEXT: [[INC:%.*]] = add i32 [[LEN]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[CLEANUP_THREAD:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP41]], [[TMP42]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[IF_END:%.*]]
+; CHECK-NPM: byte.compare:
+; CHECK-NPM-NEXT: [[TMP43:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0
+; CHECK-NPM-NEXT: br i1 [[TMP43]], label [[CLEANUP_THREAD]], label [[IF_END]]
+; CHECK-NPM: cleanup.thread:
+; CHECK-NPM-NEXT: ret void
+; CHECK-NPM: if.end:
+; CHECK-NPM-NEXT: [[RES:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
+; CHECK-NPM-NEXT: ret void
+;
+; LOOP-DEL-NPM-LABEL: define void @compare_bytes_cleanup_block(
+; LOOP-DEL-NPM-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]]
+; LOOP-DEL-NPM: mismatch_min_it_check:
+; LOOP-DEL-NPM-NEXT: br i1 false, label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
+; LOOP-DEL-NPM: mismatch_mem_check:
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC1]], i64 1
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[SRC2]], i64 1
+; LOOP-DEL-NPM-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC1]], i64 0
+; LOOP-DEL-NPM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC2]], i64 0
+; LOOP-DEL-NPM-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP3]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP6]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP10:%.*]] = lshr i64 [[TMP2]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP7]], 12
+; LOOP-DEL-NPM-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP9]]
+; LOOP-DEL-NPM-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
+; LOOP-DEL-NPM-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP14]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_SVE_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
+; LOOP-DEL-NPM: mismatch_sve_loop_preheader:
+; LOOP-DEL-NPM-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 1, i64 0)
+; LOOP-DEL-NPM-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
+; LOOP-DEL-NPM-NEXT: [[TMP17:%.*]] = mul nuw nsw i64 [[TMP16]], 16
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_SVE_LOOP:%.*]]
+; LOOP-DEL-NPM: mismatch_sve_loop:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP15]], [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP26:%.*]], [[MISMATCH_SVE_LOOP_INC:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_INDEX:%.*]] = phi i64 [ 1, [[MISMATCH_SVE_LOOP_PREHEADER]] ], [ [[TMP25:%.*]], [[MISMATCH_SVE_LOOP_INC]] ]
+; LOOP-DEL-NPM-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[MISMATCH_SVE_INDEX]]
+; LOOP-DEL-NPM-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP18]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NPM-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[MISMATCH_SVE_INDEX]]
+; LOOP-DEL-NPM-NEXT: [[TMP21:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP20]], i32 1, <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
+; LOOP-DEL-NPM-NEXT: [[TMP22:%.*]] = icmp ne <vscale x 16 x i8> [[TMP19]], [[TMP21]]
+; LOOP-DEL-NPM-NEXT: [[TMP23:%.*]] = select <vscale x 16 x i1> [[MISMATCH_SVE_LOOP_PRED]], <vscale x 16 x i1> [[TMP22]], <vscale x 16 x i1> zeroinitializer
+; LOOP-DEL-NPM-NEXT: [[TMP24:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP23]])
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP24]], label [[MISMATCH_SVE_LOOP_FOUND:%.*]], label [[MISMATCH_SVE_LOOP_INC]]
+; LOOP-DEL-NPM: mismatch_sve_loop_inc:
+; LOOP-DEL-NPM-NEXT: [[TMP25]] = add nuw nsw i64 [[MISMATCH_SVE_INDEX]], [[TMP17]]
+; LOOP-DEL-NPM-NEXT: [[TMP26]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP25]], i64 0)
+; LOOP-DEL-NPM-NEXT: [[TMP27:%.*]] = extractelement <vscale x 16 x i1> [[TMP26]], i64 0
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP27]], label [[MISMATCH_SVE_LOOP]], label [[MISMATCH_END:%.*]]
+; LOOP-DEL-NPM: mismatch_sve_loop_found:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP23]], [[MISMATCH_SVE_LOOP]] ]
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_SVE_LOOP_PRED]], [[MISMATCH_SVE_LOOP]] ]
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_SVE_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_SVE_INDEX]], [[MISMATCH_SVE_LOOP]] ]
+; LOOP-DEL-NPM-NEXT: [[TMP28:%.*]] = and <vscale x 16 x i1> [[MISMATCH_SVE_LAST_LOOP_PRED]], [[MISMATCH_SVE_FOUND_PRED]]
+; LOOP-DEL-NPM-NEXT: [[TMP29:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP28]], i1 true)
+; LOOP-DEL-NPM-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP31:%.*]] = add nuw nsw i64 [[MISMATCH_SVE_FOUND_INDEX]], [[TMP30]]
+; LOOP-DEL-NPM-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_END]]
+; LOOP-DEL-NPM: mismatch_loop_pre:
+; LOOP-DEL-NPM-NEXT: br label [[MISMATCH_LOOP:%.*]]
+; LOOP-DEL-NPM: mismatch_loop:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ 1, [[MISMATCH_LOOP_PRE]] ], [ [[TMP39:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[TMP33:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
+; LOOP-DEL-NPM-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[TMP33]]
+; LOOP-DEL-NPM-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
+; LOOP-DEL-NPM-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[TMP33]]
+; LOOP-DEL-NPM-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
+; LOOP-DEL-NPM-NEXT: [[TMP38:%.*]] = icmp eq i8 [[TMP35]], [[TMP37]]
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP38]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
+; LOOP-DEL-NPM: mismatch_loop_inc:
+; LOOP-DEL-NPM-NEXT: [[TMP39]] = add i32 [[MISMATCH_INDEX]], 1
+; LOOP-DEL-NPM-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP39]], 0
+; LOOP-DEL-NPM-NEXT: br i1 [[TMP40]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
+; LOOP-DEL-NPM: mismatch_end:
+; LOOP-DEL-NPM-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ 0, [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ 0, [[MISMATCH_SVE_LOOP_INC]] ], [ [[TMP32]], [[MISMATCH_SVE_LOOP_FOUND]] ]
+; LOOP-DEL-NPM-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ], [ 0, [[MISMATCH_END]] ]
+; LOOP-DEL-NPM-NEXT: [[INC:%.*]] = add i32 [[LEN]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[COMMON_RET:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP41]], [[TMP42]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[COMMON_RET]]
+; LOOP-DEL-NPM: byte.compare:
+; LOOP-DEL-NPM-NEXT: br label [[COMMON_RET]]
+; LOOP-DEL-NPM: common.ret:
+; LOOP-DEL-NPM-NEXT: ret void
+;
+; NO-TRANSFORM-NPM-LABEL: define void @compare_bytes_cleanup_block(
+; NO-TRANSFORM-NPM-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], 0
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[CLEANUP_THREAD:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[IF_END:%.*]]
+; NO-TRANSFORM-NPM: cleanup.thread:
+; NO-TRANSFORM-NPM-NEXT: ret void
+; NO-TRANSFORM-NPM: if.end:
+; NO-TRANSFORM-NPM-NEXT: [[RES:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
+; NO-TRANSFORM-NPM-NEXT: ret void
+;
entry:
br label %while.cond
@@ -1164,6 +2191,75 @@ define i32 @compare_bytes_simple2(ptr %a, ptr %b, ptr %c, ptr %d, i32 %len, i32
; NO-TRANSFORM-NEXT: store i32 [[INC_LCSSA]], ptr [[FINAL_PTR]], align 4
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_simple2(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: [[FINAL_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[D]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: store i32 [[INC_LCSSA]], ptr [[FINAL_PTR]], align 4
+; CHECK-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_simple2(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: [[FINAL_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[D]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: store i32 [[INC_LCSSA]], ptr [[FINAL_PTR]], align 4
+; LOOP-DEL-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_simple2(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: [[FINAL_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[D]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: store i32 [[INC_LCSSA]], ptr [[FINAL_PTR]], align 4
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
entry:
br label %while.cond
@@ -1259,6 +2355,72 @@ define i32 @compare_bytes_simple3(ptr %a, ptr %b, ptr %c, i32 %d, i32 %len, i32
; NO-TRANSFORM-NEXT: store i32 [[FINAL_VAL]], ptr [[C]], align 4
; NO-TRANSFORM-NEXT: ret i32 [[FINAL_VAL]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_simple3(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: store i32 [[FINAL_VAL]], ptr [[C]], align 4
+; CHECK-NPM-NEXT: ret i32 [[FINAL_VAL]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_simple3(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: store i32 [[FINAL_VAL]], ptr [[C]], align 4
+; LOOP-DEL-NPM-NEXT: ret i32 [[FINAL_VAL]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_simple3(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: store i32 [[FINAL_VAL]], ptr [[C]], align 4
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[FINAL_VAL]]
+;
entry:
br label %while.cond
@@ -1348,6 +2510,69 @@ define i32 @compare_bytes_sign_ext(ptr %a, ptr %b, i32 %len, i32 %n) {
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_sign_ext(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_sign_ext(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_sign_ext(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
entry:
br label %while.cond
@@ -1439,6 +2664,72 @@ define i32 @compare_bytes_outside_uses(ptr %a, ptr %b, i32 %len, i32 %n) {
; NO-TRANSFORM-NEXT: [[EXT_RES:%.*]] = zext i1 [[RES]] to i32
; NO-TRANSFORM-NEXT: ret i32 [[EXT_RES]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_outside_uses(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC]] = add i32 [[IV]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[LEN]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[RES:%.*]] = phi i1 [ [[CMP_NOT2]], [[WHILE_BODY]] ], [ [[CMP_NOT]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: [[EXT_RES:%.*]] = zext i1 [[RES]] to i32
+; CHECK-NPM-NEXT: ret i32 [[EXT_RES]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_outside_uses(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC]] = add i32 [[IV]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[LEN]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[RES:%.*]] = phi i1 [ [[CMP_NOT2]], [[WHILE_BODY]] ], [ [[CMP_NOT]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: [[EXT_RES:%.*]] = zext i1 [[RES]] to i32
+; LOOP-DEL-NPM-NEXT: ret i32 [[EXT_RES]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_outside_uses(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[IV]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[LEN]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[RES:%.*]] = phi i1 [ [[CMP_NOT2]], [[WHILE_BODY]] ], [ [[CMP_NOT]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: [[EXT_RES:%.*]] = zext i1 [[RES]] to i32
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[EXT_RES]]
+;
entry:
br label %while.cond
@@ -1524,6 +2815,66 @@ define i64 @compare_bytes_i64_index(ptr %a, ptr %b, i64 %len, i64 %n) {
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i64 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
; NO-TRANSFORM-NEXT: ret i64 [[INC_LCSSA]]
;
+; CHECK-NPM-LABEL: define i64 @compare_bytes_i64_index(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[LEN:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i64 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC]] = add i64 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INC]]
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INC]]
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i64 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: ret i64 [[INC_LCSSA]]
+;
+; LOOP-DEL-NPM-LABEL: define i64 @compare_bytes_i64_index(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[LEN:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i64 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC]] = add i64 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INC]]
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INC]]
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i64 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: ret i64 [[INC_LCSSA]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i64 @compare_bytes_i64_index(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[LEN:%.*]], i64 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i64 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i64 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INC]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INC]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i64 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: ret i64 [[INC_LCSSA]]
+;
entry:
br label %while.cond
@@ -1610,6 +2961,69 @@ define i32 @compare_bytes_simple_wrong_icmp1(ptr %a, ptr %b, i32 %len, i32 %n) {
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_simple_wrong_icmp1(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp ne i32 [[INC]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_simple_wrong_icmp1(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp ne i32 [[INC]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_simple_wrong_icmp1(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp ne i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
entry:
br label %while.cond
@@ -1697,6 +3111,69 @@ define i32 @compare_bytes_simple_wrong_icmp2(ptr %a, ptr %b, i32 %len, i32 %n) {
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_simple_wrong_icmp2(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_BODY]], label [[WHILE_END:%.*]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_simple_wrong_icmp2(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_BODY]], label [[WHILE_END:%.*]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_simple_wrong_icmp2(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_BODY]], label [[WHILE_END:%.*]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
entry:
br label %while.cond
@@ -1784,6 +3261,69 @@ define i32 @compare_bytes_simple_wrong_icmp3(ptr %a, ptr %b, i32 %len, i32 %n) {
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_simple_wrong_icmp3(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp ne i8 [[TMP0]], [[TMP1]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_simple_wrong_icmp3(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp ne i8 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_simple_wrong_icmp3(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp ne i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
entry:
br label %while.cond
@@ -1871,6 +3411,69 @@ define i32 @compare_bytes_simple_wrong_icmp4(ptr %a, ptr %b, i32 %len, i32 %n) {
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_simple_wrong_icmp4(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_END]], label [[WHILE_COND]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_simple_wrong_icmp4(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_END]], label [[WHILE_COND]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_simple_wrong_icmp4(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_END]], label [[WHILE_COND]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
entry:
br label %while.cond
@@ -1958,6 +3561,69 @@ define i32 @compare_bytes_bad_load_type(ptr %a, ptr %b, i32 %len, i32 %n) {
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_bad_load_type(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i16 [[TMP0]], [[TMP1]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_bad_load_type(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i16 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_bad_load_type(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i16 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[INC_LCSSA]]
+;
entry:
br label %while.cond
@@ -2052,6 +3718,75 @@ define i32 @compare_bytes_simple_optsize(ptr %a, ptr %b, i32 %len, i32 %extra, i
; NO-TRANSFORM-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]]
; NO-TRANSFORM-NEXT: ret i32 [[RES]]
;
+; CHECK-NPM-LABEL: define i32 @compare_bytes_simple_optsize(
+; CHECK-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NPM-NEXT: entry:
+; CHECK-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-NPM: while.cond:
+; CHECK-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK-NPM: while.body:
+; CHECK-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; CHECK-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; CHECK-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK-NPM: while.end:
+; CHECK-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ]
+; CHECK-NPM-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]]
+; CHECK-NPM-NEXT: ret i32 [[RES]]
+;
+; LOOP-DEL-NPM-LABEL: define i32 @compare_bytes_simple_optsize(
+; LOOP-DEL-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
+; LOOP-DEL-NPM-NEXT: entry:
+; LOOP-DEL-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL-NPM: while.cond:
+; LOOP-DEL-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL-NPM: while.body:
+; LOOP-DEL-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL-NPM: while.end:
+; LOOP-DEL-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ]
+; LOOP-DEL-NPM-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]]
+; LOOP-DEL-NPM-NEXT: ret i32 [[RES]]
+;
+; NO-TRANSFORM-NPM-LABEL: define i32 @compare_bytes_simple_optsize(
+; NO-TRANSFORM-NPM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; NO-TRANSFORM-NPM-NEXT: entry:
+; NO-TRANSFORM-NPM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM-NPM: while.cond:
+; NO-TRANSFORM-NPM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NPM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM-NPM: while.body:
+; NO-TRANSFORM-NPM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NPM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NPM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NPM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM-NPM: while.end:
+; NO-TRANSFORM-NPM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NPM-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]]
+; NO-TRANSFORM-NPM-NEXT: ret i32 [[RES]]
+;
entry:
br label %while.cond
>From 76846e5ed8661247a8f944171dd586a84f9b44b3 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 18 Mar 2024 18:00:03 +0800
Subject: [PATCH 2/2] add msvc workaround
---
.../llvm/Passes/TargetPassRegistry.inc | 103 ++++++++++++------
1 file changed, 67 insertions(+), 36 deletions(-)
diff --git a/llvm/include/llvm/Passes/TargetPassRegistry.inc b/llvm/include/llvm/Passes/TargetPassRegistry.inc
index 7ae04802772730..8f5c20f0280851 100644
--- a/llvm/include/llvm/Passes/TargetPassRegistry.inc
+++ b/llvm/include/llvm/Passes/TargetPassRegistry.inc
@@ -18,92 +18,120 @@
#error "must provide <Target>PassRegistry.def"
#endif
-static constexpr bool HAVE_MODULE_ANALYSIS_ = std::size({
- "",
+{
+
+// FIXME: use std::size when MSVC_MIN >= 19.30
+
+static constexpr bool HAVE_MODULE_ANALYSIS_ =
+ std::array{
+ "",
#define MODULE_ANALYSIS(NAME, CREATE_PASS) NAME,
#include GET_PASS_REGISTRY
#undef MODULE_ANALYSIS
- }) > 1;
+ }
+ .size() > 1;
-static constexpr bool HAVE_MODULE_PASS_ = std::size({
- "",
+static constexpr bool HAVE_MODULE_PASS_ =
+ std::array{
+ "",
#define MODULE_PASS(NAME, CREATE_PASS) NAME,
#include GET_PASS_REGISTRY
#undef MODULE_PASS
- }) > 1;
+ }
+ .size() > 1;
-static constexpr bool HAVE_MODULE_PASS_WITH_PARAMS_ = std::size({
- "",
+static constexpr bool HAVE_MODULE_PASS_WITH_PARAMS_ =
+ std::array{
+ "",
#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) NAME,
#include GET_PASS_REGISTRY
#undef MODULE_PASS_WITH_PARAMS
- }) > 1;
+ }
+ .size() > 1;
-static constexpr bool HAVE_FUNCTION_ANALYSIS_ = std::size({
- "",
+static constexpr bool HAVE_FUNCTION_ANALYSIS_ =
+ std::array{
+ "",
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) NAME,
#include GET_PASS_REGISTRY
#undef FUNCTION_ANALYSIS
- }) > 1;
+ }
+ .size() > 1;
-static constexpr bool HAVE_FUNCTION_ALIAS_ANALYSIS_ = std::size({
- "",
+static constexpr bool HAVE_FUNCTION_ALIAS_ANALYSIS_ =
+ std::array{
+ "",
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) NAME,
#include GET_PASS_REGISTRY
#undef FUNCTION_ALIAS_ANALYSIS
- }) > 1;
+ }
+ .size() > 1;
-static constexpr bool HAVE_FUNCTION_PASS_ = std::size({
- "",
+static constexpr bool HAVE_FUNCTION_PASS_ =
+ std::array{
+ "",
#define FUNCTION_PASS(NAME, CREATE_PASS) NAME,
#include GET_PASS_REGISTRY
#undef FUNCTION_PASS
- }) > 1;
+ }
+ .size() > 1;
-static constexpr bool HAVE_FUNCTION_PASS_WITH_PARAMS_ = std::size({
- "",
+static constexpr bool HAVE_FUNCTION_PASS_WITH_PARAMS_ =
+ std::array{
+ "",
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
NAME,
#include GET_PASS_REGISTRY
#undef FUNCTION_PASS_WITH_PARAMS
- }) > 1;
+ }
+ .size() > 1;
-static constexpr bool HAVE_LOOP_ANALYSIS_ = std::size({
- "",
+static constexpr bool HAVE_LOOP_ANALYSIS_ =
+ std::array{
+ "",
#define LOOP_ANALYSIS(NAME, CREATE_PASS) NAME,
#include GET_PASS_REGISTRY
#undef LOOP_ANALYSIS
- }) > 1;
+ }
+ .size() > 1;
-static constexpr bool HAVE_LOOP_PASS_ = std::size({
- "",
+static constexpr bool HAVE_LOOP_PASS_ =
+ std::array{
+ "",
#define LOOP_PASS(NAME, CREATE_PASS) NAME,
#include GET_PASS_REGISTRY
#undef LOOP_PASS
- }) > 1;
+ }
+ .size() > 1;
-static constexpr bool HAVE_MACHINE_FUNCTION_ANALYSIS_ = std::size({
- "",
+static constexpr bool HAVE_MACHINE_FUNCTION_ANALYSIS_ =
+ std::array{
+ "",
#define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) NAME,
#include GET_PASS_REGISTRY
#undef MACHINE_FUNCTION_ANALYSIS
- }) > 1;
+ }
+ .size() > 1;
-static constexpr bool HAVE_MACHINE_FUNCTION_PASS_ = std::size({
- "",
+static constexpr bool HAVE_MACHINE_FUNCTION_PASS_ =
+ std::array{
+ "",
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) NAME,
#include GET_PASS_REGISTRY
#undef MACHINE_FUNCTION_PASS
- }) > 1;
+ }
+ .size() > 1;
-static constexpr bool HAVE_MACHINE_FUNCTION_PASS_WITH_PARAMS_ = std::size({
- "",
+static constexpr bool HAVE_MACHINE_FUNCTION_PASS_WITH_PARAMS_ =
+ std::array{
+ "",
#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, \
PARAMS) \
NAME,
#include GET_PASS_REGISTRY
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
- }) > 1;
+ }
+ .size() > 1;
if (PopulateClassToPassNames) {
auto *PIC = PB.getPassInstrumentationCallbacks();
@@ -300,5 +328,8 @@ if constexpr (HAVE_MACHINE_FUNCTION_ANALYSIS_) {
#undef MACHINE_FUNCTION_ANALYSIS
});
}
+
+}
+
#undef GET_PASS_REGISTRY
#endif // GET_PASS_REGISTRY
More information about the llvm-commits
mailing list