[clang-tools-extra] [clang] [llvm] Move ExpandMemCmp and MergeIcmp to the middle end (PR #77370)
Gabriel Baraldi via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 17 09:58:33 PST 2024
https://github.com/gbaraldi updated https://github.com/llvm/llvm-project/pull/77370
>From 4440a91823cf878ea0dec29fb7d511a25f4333c0 Mon Sep 17 00:00:00 2001
From: Gabriel Baraldi <baraldigabriel at gmail.com>
Date: Mon, 8 Jan 2024 17:04:08 -0300
Subject: [PATCH 01/11] Move ExpandMemCmp and MergeIcmp to the middle end to
allow for better optimization of the inline expansions
---
.../include/llvm/CodeGen/CodeGenPassBuilder.h | 10 -
.../llvm/CodeGen/MachinePassRegistry.def | 2 -
llvm/include/llvm/CodeGen/Passes.h | 2 -
llvm/include/llvm/InitializePasses.h | 1 -
llvm/include/llvm/LinkAllPasses.h | 1 -
.../Scalar}/ExpandMemCmp.h | 6 +-
llvm/lib/CodeGen/CMakeLists.txt | 1 -
llvm/lib/CodeGen/CodeGen.cpp | 1 -
llvm/lib/CodeGen/TargetPassConfig.cpp | 11 -
llvm/lib/Passes/PassBuilder.cpp | 2 +-
llvm/lib/Passes/PassBuilderPipelines.cpp | 6 +
llvm/lib/Passes/PassRegistry.def | 3 +-
llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 +
.../Scalar}/ExpandMemCmp.cpp | 133 +-
llvm/test/CodeGen/AArch64/O3-pipeline.ll | 7 -
.../test/CodeGen/AArch64/bcmp-inline-small.ll | 98 -
llvm/test/CodeGen/AArch64/bcmp.ll | 537 -
.../test/CodeGen/AArch64/dag-combine-setcc.ll | 31 +-
.../AArch64/machine-licm-hoist-load.ll | 128 +-
llvm/test/CodeGen/AArch64/memcmp.ll | 3029 ---
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 28 -
llvm/test/CodeGen/ARM/O3-pipeline.ll | 7 -
llvm/test/CodeGen/BPF/memcmp.ll | 77 -
llvm/test/CodeGen/Generic/llc-start-stop.ll | 6 +-
llvm/test/CodeGen/LoongArch/opt-pipeline.ll | 9 +-
llvm/test/CodeGen/M68k/pipeline.ll | 7 -
llvm/test/CodeGen/PowerPC/O3-pipeline.ll | 9 +-
.../memCmpUsedInZeroEqualityComparison.ll | 168 -
.../CodeGen/PowerPC/memcmp-mergeexpand.ll | 39 -
llvm/test/CodeGen/PowerPC/memcmp.ll | 62 -
llvm/test/CodeGen/PowerPC/memcmpIR.ll | 178 -
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 9 +-
llvm/test/CodeGen/X86/memcmp-mergeexpand.ll | 49 -
llvm/test/CodeGen/X86/memcmp-minsize-x32.ll | 445 -
llvm/test/CodeGen/X86/memcmp-minsize.ll | 433 -
.../CodeGen/X86/memcmp-more-load-pairs-x32.ll | 2911 ---
.../CodeGen/X86/memcmp-more-load-pairs.ll | 4006 ---
llvm/test/CodeGen/X86/memcmp-optsize-x32.ll | 583 -
llvm/test/CodeGen/X86/memcmp-optsize.ll | 596 -
llvm/test/CodeGen/X86/memcmp-pgso-x32.ll | 600 -
llvm/test/CodeGen/X86/memcmp-pgso.ll | 613 -
llvm/test/CodeGen/X86/memcmp-x32.ll | 2429 --
llvm/test/CodeGen/X86/memcmp.ll | 3065 ---
llvm/test/CodeGen/X86/opt-pipeline.ll | 9 +-
llvm/test/Other/new-pm-defaults.ll | 4 +-
.../Other/new-pm-thinlto-postlink-defaults.ll | 4 +-
.../new-pm-thinlto-postlink-pgo-defaults.ll | 4 +-
...-pm-thinlto-postlink-samplepgo-defaults.ll | 4 +-
.../Other/new-pm-thinlto-prelink-defaults.ll | 4 +-
.../new-pm-thinlto-prelink-pgo-defaults.ll | 26 +-
...w-pm-thinlto-prelink-samplepgo-defaults.ll | 4 +-
.../Transforms/ExpandMemCmp/AArch64/bcmp.ll | 751 +
.../ExpandMemCmp/AArch64/memcmp-extra.ll | 3434 +++
.../Transforms/ExpandMemCmp/AArch64/memcmp.ll | 1 -
.../Transforms/ExpandMemCmp/BPF/lit.local.cfg | 4 +
.../Transforms/ExpandMemCmp/BPF/memcmp.ll | 119 +
.../ExpandMemCmp/PowerPC/lit.local.cfg | 2 +
.../memCmpUsedInZeroEqualityComparison.ll | 218 +
.../PowerPC/memcmp-mergeexpand.ll | 48 +
.../Transforms/ExpandMemCmp/PowerPC/memcmp.ll | 70 +
.../ExpandMemCmp/PowerPC/memcmpIR.ll | 216 +
llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll | 16 +-
.../Transforms/ExpandMemCmp/X86/memcmp-2.ll | 20249 ++++++++++++++++
.../ExpandMemCmp}/X86/memcmp-constant.ll | 89 +-
.../ExpandMemCmp/X86/memcmp-minsize-x32.ll | 493 +
.../ExpandMemCmp/X86/memcmp-minsize.ll | 707 +
.../X86/memcmp-more-load-pairs-x32.ll | 6203 +++++
.../X86/memcmp-more-load-pairs.ll | 18833 ++++++++++++++
.../ExpandMemCmp/X86/memcmp-nobuiltin.ll | 248 +
.../ExpandMemCmp/X86/memcmp-optsize-x32.ll | 870 +
.../ExpandMemCmp/X86/memcmp-optsize.ll | 1414 ++
.../ExpandMemCmp/X86/memcmp-pgso-x32.ll | 887 +
.../ExpandMemCmp/X86/memcmp-pgso.ll | 1347 +
.../ExpandMemCmp/X86/memcmp-x32-2.ll | 4813 ++++
.../Transforms/ExpandMemCmp/X86/memcmp-x32.ll | 523 +-
.../Transforms/ExpandMemCmp/X86/memcmp.ll | 1194 +-
.../PhaseOrdering/PowerPC/lit.local.cfg | 2 +
.../PhaseOrdering/X86/memcmp-early.ll | 86 +
.../PhaseOrdering/X86/memcmp-mergeexpand.ll | 62 +
.../Transforms/PhaseOrdering/X86/memcmp.ll | 856 +
llvm/tools/opt/opt.cpp | 1 -
.../gn/secondary/llvm/lib/CodeGen/BUILD.gn | 1 -
.../llvm/lib/Transforms/Scalar/BUILD.gn | 1 +
83 files changed, 63081 insertions(+), 21075 deletions(-)
rename llvm/include/llvm/{CodeGen => Transforms/Scalar}/ExpandMemCmp.h (83%)
rename llvm/lib/{CodeGen => Transforms/Scalar}/ExpandMemCmp.cpp (90%)
delete mode 100644 llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
delete mode 100644 llvm/test/CodeGen/AArch64/bcmp.ll
delete mode 100644 llvm/test/CodeGen/AArch64/memcmp.ll
delete mode 100644 llvm/test/CodeGen/BPF/memcmp.ll
delete mode 100644 llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
delete mode 100644 llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
delete mode 100644 llvm/test/CodeGen/PowerPC/memcmp.ll
delete mode 100644 llvm/test/CodeGen/PowerPC/memcmpIR.ll
delete mode 100644 llvm/test/CodeGen/X86/memcmp-mergeexpand.ll
delete mode 100644 llvm/test/CodeGen/X86/memcmp-minsize-x32.ll
delete mode 100644 llvm/test/CodeGen/X86/memcmp-minsize.ll
delete mode 100644 llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
delete mode 100644 llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
delete mode 100644 llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
delete mode 100644 llvm/test/CodeGen/X86/memcmp-optsize.ll
delete mode 100644 llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
delete mode 100644 llvm/test/CodeGen/X86/memcmp-pgso.ll
delete mode 100644 llvm/test/CodeGen/X86/memcmp-x32.ll
delete mode 100644 llvm/test/CodeGen/X86/memcmp.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/AArch64/bcmp.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp-extra.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/BPF/lit.local.cfg
create mode 100644 llvm/test/Transforms/ExpandMemCmp/BPF/memcmp.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/PowerPC/lit.local.cfg
create mode 100644 llvm/test/Transforms/ExpandMemCmp/PowerPC/memCmpUsedInZeroEqualityComparison.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp-mergeexpand.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmpIR.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/X86/memcmp-2.ll
rename llvm/test/{CodeGen => Transforms/ExpandMemCmp}/X86/memcmp-constant.ll (50%)
create mode 100644 llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize-x32.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs-x32.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/X86/memcmp-nobuiltin.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize-x32.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso-x32.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso.ll
create mode 100644 llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32-2.ll
create mode 100644 llvm/test/Transforms/PhaseOrdering/PowerPC/lit.local.cfg
create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/memcmp-early.ll
create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/memcmp-mergeexpand.ll
create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll
diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
index a7cbb0910baabf..556304231b397b 100644
--- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -25,7 +25,6 @@
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/CallBrPrepare.h"
#include "llvm/CodeGen/DwarfEHPrepare.h"
-#include "llvm/CodeGen/ExpandMemCmp.h"
#include "llvm/CodeGen/ExpandReductions.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/IndirectBrExpand.h"
@@ -629,15 +628,6 @@ void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const {
addPass(PrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
}
- if (getOptLevel() != CodeGenOptLevel::None) {
- // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
- // loads and compares. ExpandMemCmpPass then tries to expand those calls
- // into optimally-sized loads and compares. The transforms are enabled by a
- // target lowering hook.
- if (!Opt.DisableMergeICmps)
- addPass(MergeICmpsPass());
- addPass(ExpandMemCmpPass(&TM));
- }
// Run GC lowering passes for builtin collectors
// TODO: add a pass insertion point here
diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
index f950dfae7e338b..3c00668aae3897 100644
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -47,7 +47,6 @@ FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass, (TM))
FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))
FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, (TM))
FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass, (TM))
-FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass, (TM))
FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ())
FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, (TM))
@@ -55,7 +54,6 @@ FUNCTION_PASS("interleaved-access", InterleavedAccessPass, (TM))
FUNCTION_PASS("interleaved-load-combine", InterleavedLoadCombinePass, (TM))
FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ())
FUNCTION_PASS("lowerinvoke", LowerInvokePass, ())
-FUNCTION_PASS("mergeicmps", MergeICmpsPass, ())
FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ())
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index ca9fbb1def7624..e5ed5f15f62ed7 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -519,8 +519,6 @@ namespace llvm {
// Expands large div/rem instructions.
FunctionPass *createExpandLargeFpConvertPass();
- // This pass expands memcmp() to load/stores.
- FunctionPass *createExpandMemCmpLegacyPass();
/// Creates Break False Dependencies pass. \see BreakFalseDeps.cpp
FunctionPass *createBreakFalseDeps();
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 46b1e95c3c15f3..b0ca9fa942cda3 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -103,7 +103,6 @@ void initializeEdgeBundlesPass(PassRegistry&);
void initializeEHContGuardCatchretPass(PassRegistry &);
void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry&);
void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
-void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
void initializeExpandVectorPredicationPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 7a21876e565a7c..9aff428fbe938b 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -119,7 +119,6 @@ namespace {
(void) llvm::createPostDomTree();
(void) llvm::createMergeICmpsLegacyPass();
(void) llvm::createExpandLargeDivRemPass();
- (void)llvm::createExpandMemCmpLegacyPass();
(void) llvm::createExpandVectorPredicationPass();
std::string buf;
llvm::raw_string_ostream os(buf);
diff --git a/llvm/include/llvm/CodeGen/ExpandMemCmp.h b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
similarity index 83%
rename from llvm/include/llvm/CodeGen/ExpandMemCmp.h
rename to llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
index 94a877854f327a..94ba0cf9305040 100644
--- a/llvm/include/llvm/CodeGen/ExpandMemCmp.h
+++ b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_EXPANDMEMCMP_H
-#define LLVM_CODEGEN_EXPANDMEMCMP_H
+#ifndef LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H
+#define LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H
#include "llvm/IR/PassManager.h"
@@ -26,4 +26,4 @@ class ExpandMemCmpPass : public PassInfoMixin<ExpandMemCmpPass> {
} // namespace llvm
-#endif // LLVM_CODEGEN_EXPANDMEMCMP_H
+#endif // LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index df2d1831ee5fdb..518432e9a7b32f 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -71,7 +71,6 @@ add_llvm_component_library(LLVMCodeGen
ExecutionDomainFix.cpp
ExpandLargeDivRem.cpp
ExpandLargeFpConvert.cpp
- ExpandMemCmp.cpp
ExpandPostRAPseudos.cpp
ExpandReductions.cpp
ExpandVectorPredication.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 7b73a7b11ddf1c..043fa4e6eabe8f 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -41,7 +41,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyTailDuplicatePass(Registry);
initializeExpandLargeDivRemLegacyPassPass(Registry);
initializeExpandLargeFpConvertLegacyPassPass(Registry);
- initializeExpandMemCmpLegacyPassPass(Registry);
initializeExpandPostRAPass(Registry);
initializeFEntryInserterPass(Registry);
initializeFinalizeISelPass(Registry);
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 4003a08a5422dd..33562e90e94426 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -108,9 +108,6 @@ static cl::opt<bool> EnableImplicitNullChecks(
"enable-implicit-null-checks",
cl::desc("Fold null checks into faulting memory operations"),
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableMergeICmps("disable-mergeicmps",
- cl::desc("Disable MergeICmps Pass"),
- cl::init(false), cl::Hidden);
static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
cl::desc("Print LLVM IR produced by the loop-reduce pass"));
static cl::opt<bool>
@@ -487,7 +484,6 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() {
SET_BOOLEAN_OPTION(EnableImplicitNullChecks)
SET_BOOLEAN_OPTION(EnableMachineOutliner)
SET_BOOLEAN_OPTION(MISchedPostRA)
- SET_BOOLEAN_OPTION(DisableMergeICmps)
SET_BOOLEAN_OPTION(DisableLSR)
SET_BOOLEAN_OPTION(DisableConstantHoisting)
SET_BOOLEAN_OPTION(DisableCGP)
@@ -872,13 +868,6 @@ void TargetPassConfig::addIRPasses() {
"\n\n*** Code after LSR ***\n"));
}
- // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
- // loads and compares. ExpandMemCmpPass then tries to expand those calls
- // into optimally-sized loads and compares. The transforms are enabled by a
- // target lowering hook.
- if (!DisableMergeICmps)
- addPass(createMergeICmpsLegacyPass());
- addPass(createExpandMemCmpLegacyPass());
}
// Run GC lowering passes for builtin collectors
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 439f749bda8bb7..20448554756aca 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -76,7 +76,6 @@
#include "llvm/CodeGen/DwarfEHPrepare.h"
#include "llvm/CodeGen/ExpandLargeDivRem.h"
#include "llvm/CodeGen/ExpandLargeFpConvert.h"
-#include "llvm/CodeGen/ExpandMemCmp.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/HardwareLoops.h"
#include "llvm/CodeGen/IndirectBrExpand.h"
@@ -181,6 +180,7 @@
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/Transforms/Scalar/DivRemPairs.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/Transforms/Scalar/GVN.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 5c6c391049a7b2..e2dd413f12d696 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -86,6 +86,7 @@
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/Transforms/Scalar/DivRemPairs.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
@@ -111,6 +112,7 @@
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
+#include "llvm/Transforms/Scalar/MergeICmps.h"
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
#include "llvm/Transforms/Scalar/NewGVN.h"
#include "llvm/Transforms/Scalar/Reassociate.h"
@@ -386,6 +388,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
if (AreStatisticsEnabled())
FPM.addPass(CountVisitsPass());
+ FPM.addPass(MergeICmpsPass());
+ FPM.addPass(ExpandMemCmpPass(TM));
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -532,6 +536,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
if (AreStatisticsEnabled())
FPM.addPass(CountVisitsPass());
+ FPM.addPass(MergeICmpsPass());
+ FPM.addPass(ExpandMemCmpPass(TM));
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 82ce040c649626..31adbf1942b410 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -353,6 +353,7 @@ FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
FUNCTION_PASS("memprof", MemProfilerPass())
FUNCTION_PASS("mergeicmps", MergeICmpsPass())
+FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM))
FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
@@ -415,7 +416,7 @@ FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
-FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
+FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
FUNCTION_PASS("tsan", ThreadSanitizerPass())
FUNCTION_PASS("typepromotion", TypePromotionPass(TM))
FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass())
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 2dd27037a17de7..f6e666dd071256 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_component_library(LLVMScalarOpts
DeadStoreElimination.cpp
DFAJumpThreading.cpp
DivRemPairs.cpp
+ ExpandMemCmp.cpp
EarlyCSE.cpp
FlattenCFGPass.cpp
Float2Int.cpp
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
similarity index 90%
rename from llvm/lib/CodeGen/ExpandMemCmp.cpp
rename to llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
index bb84813569f4d5..973875ee142978 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
@@ -11,21 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/ExpandMemCmp.h"
+#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -35,9 +36,6 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-namespace llvm {
-class TargetLowering;
-}
#define DEBUG_TYPE "expand-memcmp"
@@ -305,6 +303,7 @@ unsigned MemCmpExpansion::getNumBlocks() {
}
void MemCmpExpansion::createLoadCmpBlocks() {
+ assert(ResBlock.BB && "ResBlock must be created before LoadCmpBlocks");
for (unsigned i = 0; i < getNumBlocks(); i++) {
BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
EndBlock->getParent(), EndBlock);
@@ -313,6 +312,7 @@ void MemCmpExpansion::createLoadCmpBlocks() {
}
void MemCmpExpansion::createResultBlock() {
+ assert(EndBlock && "EndBlock must be created before ResultBlock");
ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
EndBlock->getParent(), EndBlock);
}
@@ -828,9 +828,9 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
/// ret i32 %phi.res
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
- const TargetLowering *TLI, const DataLayout *DL,
- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
- DomTreeUpdater *DTU, const bool IsBCmp) {
+ const DataLayout *DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DomTreeUpdater *DTU,
+ const bool IsBCmp) {
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
@@ -845,9 +845,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
}
const uint64_t SizeVal = SizeCast->getZExtValue();
- if (SizeVal == 0) {
- return false;
- }
+
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp =
@@ -857,28 +855,33 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
auto Options = TTI->enableMemCmpExpansion(OptForSize,
IsUsedForZeroCmp);
if (!Options) return false;
+ Value *Res = nullptr;
- if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
- Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
-
- if (OptForSize &&
- MaxLoadsPerMemcmpOptSize.getNumOccurrences())
- Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
+ if (SizeVal == 0) {
+ Res = ConstantInt::get(CI->getFunctionType()->getReturnType(), 0);
+ } else {
+ if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
+ Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
- if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
- Options.MaxNumLoads = MaxLoadsPerMemcmp;
+ if (OptForSize &&
+ MaxLoadsPerMemcmpOptSize.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
- MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
+ if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmp;
- // Don't expand if this will require more loads than desired by the target.
- if (Expansion.getNumLoads() == 0) {
- NumMemCmpGreaterThanMax++;
- return false;
- }
+ MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
- NumMemCmpInlined++;
+ // Don't expand if this will require more loads than desired by the target.
+ if (Expansion.getNumLoads() == 0) {
+ NumMemCmpGreaterThanMax++;
+ return false;
+ }
- if (Value *Res = Expansion.getMemCmpExpansion()) {
+ NumMemCmpInlined++;
+ Res = Expansion.getMemCmpExpansion();
+ }
+ if (Res) {
// Replace call with result of expansion and erase call.
CI->replaceAllUsesWith(Res);
CI->eraseFromParent();
@@ -889,62 +892,18 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
// Returns true if a change was made.
static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const TargetTransformInfo *TTI,
const DataLayout &DL, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const TargetLowering *TL,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DominatorTree *DT);
-class ExpandMemCmpLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- ExpandMemCmpLegacyPass() : FunctionPass(ID) {
- initializeExpandMemCmpLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F)) return false;
-
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC) {
- return false;
- }
- const TargetLowering* TL =
- TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
-
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- const TargetTransformInfo *TTI =
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto *BFI = (PSI && PSI->hasProfileSummary()) ?
- &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
- nullptr;
- DominatorTree *DT = nullptr;
- if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
- DT = &DTWP->getDomTree();
- auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
- return !PA.areAllPreserved();
- }
-
-private:
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
- FunctionPass::getAnalysisUsage(AU);
- }
-};
bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const TargetTransformInfo *TTI,
const DataLayout &DL, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) {
for (Instruction &I : BB) {
@@ -955,7 +914,7 @@ bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
LibFunc Func;
if (TLI->getLibFunc(*CI, Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
- expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
+ expandMemCmp(CI, TTI, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
return true;
}
}
@@ -963,8 +922,7 @@ bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
}
PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
- const TargetLowering *TL, ProfileSummaryInfo *PSI,
+ const TargetTransformInfo *TTI, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DominatorTree *DT) {
std::optional<DomTreeUpdater> DTU;
if (DT)
@@ -973,7 +931,7 @@ PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const DataLayout& DL = F.getParent()->getDataLayout();
bool MadeChanges = false;
for (auto BBIt = F.begin(); BBIt != F.end();) {
- if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
+ if (runOnBlock(*BBIt, TLI, TTI, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
MadeChanges = true;
// If changes were made, restart the function from the beginning, since
// the structure of the function was changed.
@@ -996,7 +954,6 @@ PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
PreservedAnalyses ExpandMemCmpPass::run(Function &F,
FunctionAnalysisManager &FAM) {
- const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
const auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
const auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
auto *PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
@@ -1005,21 +962,5 @@ PreservedAnalyses ExpandMemCmpPass::run(Function &F,
? &FAM.getResult<BlockFrequencyAnalysis>(F)
: nullptr;
auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
-
- return runImpl(F, &TLI, &TTI, TL, PSI, BFI, DT);
-}
-
-char ExpandMemCmpLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(ExpandMemCmpLegacyPass, DEBUG_TYPE,
- "Expand memcmp() to load/stores", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ExpandMemCmpLegacyPass, DEBUG_TYPE,
- "Expand memcmp() to load/stores", false, false)
-
-FunctionPass *llvm::createExpandMemCmpLegacyPass() {
- return new ExpandMemCmpLegacyPass();
+ return runImpl(F, &TLI, &TTI, PSI, BFI, DT);
}
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 638f26298ee26a..c96c1edebaf8cc 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -43,13 +43,6 @@
; CHECK-NEXT: Canonicalize Freeze Instructions in Loops
; CHECK-NEXT: Induction Variable Users
; CHECK-NEXT: Loop Strength Reduction
-; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
-; CHECK-NEXT: Function Alias Analysis Results
-; CHECK-NEXT: Merge contiguous icmps into a memcmp
-; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: Lazy Branch Probability Analysis
-; CHECK-NEXT: Lazy Block Frequency Analysis
-; CHECK-NEXT: Expand memcmp() to load/stores
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Lower constant intrinsics
diff --git a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
deleted file mode 100644
index 4846c46e648178..00000000000000
--- a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
+++ /dev/null
@@ -1,98 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -O2 < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefix=CHECKN
-; RUN: llc -O2 < %s -mtriple=aarch64-linux-gnu -mattr=strict-align | FileCheck %s --check-prefix=CHECKS
-
-declare i32 @bcmp(ptr, ptr, i64) nounwind readonly
-declare i32 @memcmp(ptr, ptr, i64) nounwind readonly
-
-define i1 @test_b2(ptr %s1, ptr %s2) {
-; CHECKN-LABEL: test_b2:
-; CHECKN: // %bb.0: // %entry
-; CHECKN-NEXT: ldr x8, [x0]
-; CHECKN-NEXT: ldr x9, [x1]
-; CHECKN-NEXT: ldur x10, [x0, #7]
-; CHECKN-NEXT: ldur x11, [x1, #7]
-; CHECKN-NEXT: cmp x8, x9
-; CHECKN-NEXT: ccmp x10, x11, #0, eq
-; CHECKN-NEXT: cset w0, eq
-; CHECKN-NEXT: ret
-;
-; CHECKS-LABEL: test_b2:
-; CHECKS: // %bb.0: // %entry
-; CHECKS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECKS-NEXT: .cfi_def_cfa_offset 16
-; CHECKS-NEXT: .cfi_offset w30, -16
-; CHECKS-NEXT: mov w2, #15 // =0xf
-; CHECKS-NEXT: bl bcmp
-; CHECKS-NEXT: cmp w0, #0
-; CHECKS-NEXT: cset w0, eq
-; CHECKS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECKS-NEXT: ret
-entry:
- %bcmp = call i32 @bcmp(ptr %s1, ptr %s2, i64 15)
- %ret = icmp eq i32 %bcmp, 0
- ret i1 %ret
-}
-
-; TODO: Four loads should be within the limit, but the heuristic isn't implemented.
-define i1 @test_b2_align8(ptr align 8 %s1, ptr align 8 %s2) {
-; CHECKN-LABEL: test_b2_align8:
-; CHECKN: // %bb.0: // %entry
-; CHECKN-NEXT: ldr x8, [x0]
-; CHECKN-NEXT: ldr x9, [x1]
-; CHECKN-NEXT: ldur x10, [x0, #7]
-; CHECKN-NEXT: ldur x11, [x1, #7]
-; CHECKN-NEXT: cmp x8, x9
-; CHECKN-NEXT: ccmp x10, x11, #0, eq
-; CHECKN-NEXT: cset w0, eq
-; CHECKN-NEXT: ret
-;
-; CHECKS-LABEL: test_b2_align8:
-; CHECKS: // %bb.0: // %entry
-; CHECKS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECKS-NEXT: .cfi_def_cfa_offset 16
-; CHECKS-NEXT: .cfi_offset w30, -16
-; CHECKS-NEXT: mov w2, #15 // =0xf
-; CHECKS-NEXT: bl bcmp
-; CHECKS-NEXT: cmp w0, #0
-; CHECKS-NEXT: cset w0, eq
-; CHECKS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECKS-NEXT: ret
-entry:
- %bcmp = call i32 @bcmp(ptr %s1, ptr %s2, i64 15)
- %ret = icmp eq i32 %bcmp, 0
- ret i1 %ret
-}
-
-define i1 @test_bs(ptr %s1, ptr %s2) optsize {
-; CHECKN-LABEL: test_bs:
-; CHECKN: // %bb.0: // %entry
-; CHECKN-NEXT: ldp x8, x11, [x1]
-; CHECKN-NEXT: ldr x12, [x0, #16]
-; CHECKN-NEXT: ldp x9, x10, [x0]
-; CHECKN-NEXT: ldr x13, [x1, #16]
-; CHECKN-NEXT: cmp x9, x8
-; CHECKN-NEXT: ldur x8, [x0, #23]
-; CHECKN-NEXT: ldur x9, [x1, #23]
-; CHECKN-NEXT: ccmp x10, x11, #0, eq
-; CHECKN-NEXT: ccmp x12, x13, #0, eq
-; CHECKN-NEXT: ccmp x8, x9, #0, eq
-; CHECKN-NEXT: cset w0, eq
-; CHECKN-NEXT: ret
-;
-; CHECKS-LABEL: test_bs:
-; CHECKS: // %bb.0: // %entry
-; CHECKS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECKS-NEXT: .cfi_def_cfa_offset 16
-; CHECKS-NEXT: .cfi_offset w30, -16
-; CHECKS-NEXT: mov w2, #31 // =0x1f
-; CHECKS-NEXT: bl memcmp
-; CHECKS-NEXT: cmp w0, #0
-; CHECKS-NEXT: cset w0, eq
-; CHECKS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECKS-NEXT: ret
-entry:
- %memcmp = call i32 @memcmp(ptr %s1, ptr %s2, i64 31)
- %ret = icmp eq i32 %memcmp, 0
- ret i1 %ret
-}
diff --git a/llvm/test/CodeGen/AArch64/bcmp.ll b/llvm/test/CodeGen/AArch64/bcmp.ll
deleted file mode 100644
index fee52ead989629..00000000000000
--- a/llvm/test/CodeGen/AArch64/bcmp.ll
+++ /dev/null
@@ -1,537 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -O2 < %s -mtriple=aarch64-linux-gnu | FileCheck %s
-
-declare i32 @bcmp(ptr, ptr, i64)
-
-define i1 @bcmp0(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp0:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 0)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp1(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: ldrb w9, [x1]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 1)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp2(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 2)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-; or (and (xor a, b), C1), (and (xor c, d), C2)
-define i1 @bcmp3(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp3:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: ldrb w10, [x0, #2]
-; CHECK-NEXT: ldrb w11, [x1, #2]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: ccmp w10, w11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 3)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp4(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 4)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-; or (xor a, b), (and (xor c, d), C2)
-define i1 @bcmp5(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp5:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: ldrb w10, [x0, #4]
-; CHECK-NEXT: ldrb w11, [x1, #4]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: ccmp w10, w11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 5)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-; or (xor a, b), (and (xor c, d), C2)
-define i1 @bcmp6(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp6:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: ldrh w10, [x0, #4]
-; CHECK-NEXT: ldrh w11, [x1, #4]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: ccmp w10, w11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 6)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-; or (xor a, b), (xor c, d)
-define i1 @bcmp7(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp7:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: ldur w10, [x0, #3]
-; CHECK-NEXT: ldur w11, [x1, #3]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: ccmp w10, w11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 7)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp8(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 8)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-; or (xor a, b), (and (xor c, d), C2)
-define i1 @bcmp9(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp9:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldrb w10, [x0, #8]
-; CHECK-NEXT: ldrb w11, [x1, #8]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 9)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp10(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp10:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldrh w10, [x0, #8]
-; CHECK-NEXT: ldrh w11, [x1, #8]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 10)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp11(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp11:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldur x10, [x0, #3]
-; CHECK-NEXT: ldur x11, [x1, #3]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 11)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp12(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp12:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldr w10, [x0, #8]
-; CHECK-NEXT: ldr w11, [x1, #8]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 12)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp13(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp13:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldur x10, [x0, #5]
-; CHECK-NEXT: ldur x11, [x1, #5]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 13)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp14(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp14:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldur x10, [x0, #6]
-; CHECK-NEXT: ldur x11, [x1, #6]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 14)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp15(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp15:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldur x10, [x0, #7]
-; CHECK-NEXT: ldur x11, [x1, #7]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 15)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp16(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 16)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp20(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp20:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldr w12, [x0, #16]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldr w13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x12, x13, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 20)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp24(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp24:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldr x12, [x0, #16]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldr x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x12, x13, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 24)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp28(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp28:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldr x12, [x0, #16]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldr x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldr w8, [x0, #24]
-; CHECK-NEXT: ldr w9, [x1, #24]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x12, x13, #0, eq
-; CHECK-NEXT: ccmp x8, x9, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 28)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp33(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp33:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldp x12, x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldp x8, x9, [x0, #16]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ldrb w10, [x0, #32]
-; CHECK-NEXT: ldrb w11, [x1, #32]
-; CHECK-NEXT: ccmp x8, x12, #0, eq
-; CHECK-NEXT: ccmp x9, x13, #0, eq
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 33)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp38(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp38:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldp x12, x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldp x8, x9, [x0, #16]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ldur x10, [x0, #30]
-; CHECK-NEXT: ldur x11, [x1, #30]
-; CHECK-NEXT: ccmp x8, x12, #0, eq
-; CHECK-NEXT: ccmp x9, x13, #0, eq
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 38)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp45(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp45:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldp x12, x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldp x8, x9, [x0, #16]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ldr x10, [x0, #32]
-; CHECK-NEXT: ldr x11, [x1, #32]
-; CHECK-NEXT: ccmp x8, x12, #0, eq
-; CHECK-NEXT: ldur x8, [x0, #37]
-; CHECK-NEXT: ldur x12, [x1, #37]
-; CHECK-NEXT: ccmp x9, x13, #0, eq
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x8, x12, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 45)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-; Although the large cmp chain may be not profitable on high end CPU, we
-; believe it is better on most cpus, so perform the transform now.
-; 8 xor + 7 or + 1 cmp only need 6 cycles on a 4 width ALU port machine
-; 2 cycle for xor
-; 3 cycle for or
-; 1 cycle for cmp
-define i1 @bcmp64(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldp x12, x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldp x8, x9, [x0, #16]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x8, x12, #0, eq
-; CHECK-NEXT: ldp x8, x11, [x0, #32]
-; CHECK-NEXT: ldp x10, x12, [x1, #32]
-; CHECK-NEXT: ccmp x9, x13, #0, eq
-; CHECK-NEXT: ldp x9, x13, [x1, #48]
-; CHECK-NEXT: ccmp x8, x10, #0, eq
-; CHECK-NEXT: ldp x8, x10, [x0, #48]
-; CHECK-NEXT: ccmp x11, x12, #0, eq
-; CHECK-NEXT: ccmp x8, x9, #0, eq
-; CHECK-NEXT: ccmp x10, x13, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 64)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp89(ptr %a, ptr %b) {
-; CHECK-LABEL: bcmp89:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: mov w2, #89 // =0x59
-; CHECK-NEXT: bl bcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %cr = call i32 @bcmp(ptr %a, ptr %b, i64 89)
- %r = icmp eq i32 %cr, 0
- ret i1 %r
-}
-
-define i1 @bcmp_zext(i32 %0, i32 %1, i8 %2, i8 %3) {
-; CHECK-LABEL: bcmp_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w2, #0xff
-; CHECK-NEXT: and w9, w3, #0xff
-; CHECK-NEXT: cmp w1, w0
-; CHECK-NEXT: ccmp w9, w8, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %5 = xor i32 %1, %0
- %6 = xor i8 %3, %2
- %7 = zext i8 %6 to i32
- %8 = or i32 %5, %7
- %9 = icmp eq i32 %8, 0
- ret i1 %9
-}
-
-define i1 @bcmp_i8(i8 %a0, i8 %b0, i8 %a1, i8 %b1, i8 %a2, i8 %b2) {
-; CHECK-LABEL: bcmp_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0xff
-; CHECK-NEXT: and w9, w2, #0xff
-; CHECK-NEXT: and w10, w3, #0xff
-; CHECK-NEXT: cmp w8, w0, uxtb
-; CHECK-NEXT: and w8, w4, #0xff
-; CHECK-NEXT: and w11, w5, #0xff
-; CHECK-NEXT: ccmp w10, w9, #0, eq
-; CHECK-NEXT: ccmp w11, w8, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %xor0 = xor i8 %b0, %a0
- %xor1 = xor i8 %b1, %a1
- %xor2 = xor i8 %b2, %a2
- %or0 = or i8 %xor0, %xor1
- %or1 = or i8 %or0, %xor2
- %r = icmp eq i8 %or1, 0
- ret i1 %r
-}
-
-define i1 @bcmp_i16(i16 %a0, i16 %b0, i16 %a1, i16 %b1, i16 %a2, i16 %b2) {
-; CHECK-LABEL: bcmp_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0xffff
-; CHECK-NEXT: and w9, w2, #0xffff
-; CHECK-NEXT: and w10, w3, #0xffff
-; CHECK-NEXT: cmp w8, w0, uxth
-; CHECK-NEXT: and w8, w4, #0xffff
-; CHECK-NEXT: and w11, w5, #0xffff
-; CHECK-NEXT: ccmp w10, w9, #0, eq
-; CHECK-NEXT: ccmp w11, w8, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %xor0 = xor i16 %b0, %a0
- %xor1 = xor i16 %b1, %a1
- %xor2 = xor i16 %b2, %a2
- %or0 = or i16 %xor0, %xor1
- %or1 = or i16 %or0, %xor2
- %r = icmp eq i16 %or1, 0
- ret i1 %r
-}
-
-define i1 @bcmp_i128(i128 %a0, i128 %b0, i128 %a1, i128 %b1, i128 %a2, i128 %b2) {
-; CHECK-LABEL: bcmp_i128:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x2, x0
-; CHECK-NEXT: ldp x8, x10, [sp]
-; CHECK-NEXT: ccmp x3, x1, #0, eq
-; CHECK-NEXT: ldp x9, x11, [sp, #16]
-; CHECK-NEXT: ccmp x6, x4, #0, eq
-; CHECK-NEXT: ccmp x7, x5, #0, eq
-; CHECK-NEXT: cset w12, ne
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ccmp x11, x10, #0, eq
-; CHECK-NEXT: csinc w0, w12, wzr, eq
-; CHECK-NEXT: ret
- %xor0 = xor i128 %b0, %a0
- %xor1 = xor i128 %b1, %a1
- %xor2 = xor i128 %b2, %a2
- %or0 = or i128 %xor0, %xor1
- %or1 = or i128 %or0, %xor2
- %r = icmp ne i128 %or1, 0
- ret i1 %r
-}
-
-define i1 @bcmp_i42(i42 %a0, i42 %b0, i42 %a1, i42 %b1, i42 %a2, i42 %b2) {
-; CHECK-LABEL: bcmp_i42:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x3ffffffffff
-; CHECK-NEXT: and x9, x1, #0x3ffffffffff
-; CHECK-NEXT: and x10, x2, #0x3ffffffffff
-; CHECK-NEXT: and x11, x3, #0x3ffffffffff
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: and x8, x4, #0x3ffffffffff
-; CHECK-NEXT: and x9, x5, #0x3ffffffffff
-; CHECK-NEXT: ccmp x11, x10, #0, eq
-; CHECK-NEXT: ccmp x9, x8, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %xor0 = xor i42 %b0, %a0
- %xor1 = xor i42 %b1, %a1
- %xor2 = xor i42 %b2, %a2
- %or0 = or i42 %xor0, %xor1
- %or1 = or i42 %or0, %xor2
- %r = icmp ne i42 %or1, 0
- ret i1 %r
-}
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
index a48a4e0e723ebc..855a5b23f6c1cc 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
define i1 @combine_setcc_eq_vecreduce_or_v8i1(<8 x i8> %a) {
@@ -266,8 +266,18 @@ define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) {
; CHECK-NEXT: ccmp x10, x11, #0, eq
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
- %bcmp = tail call i32 @bcmp(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b, i64 16)
- %cmp = icmp eq i32 %bcmp, 0
+ %1 = load i64, ptr %a, align 1
+ %2 = load i64, ptr %b, align 1
+ %3 = xor i64 %1, %2
+ %4 = getelementptr i8, ptr %a, i64 8
+ %5 = getelementptr i8, ptr %b, i64 8
+ %6 = load i64, ptr %4, align 1
+ %7 = load i64, ptr %5, align 1
+ %8 = xor i64 %6, %7
+ %9 = or i64 %3, %8
+ %10 = icmp ne i64 %9, 0
+ %11 = zext i1 %10 to i32
+ %cmp = icmp eq i32 %11, 0
ret i1 %cmp
}
@@ -280,9 +290,18 @@ define i1 @combine_setcc_ne0_conjunction_xor_or(ptr %a, ptr %b) {
; CHECK-NEXT: ccmp x10, x11, #0, eq
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
- %bcmp = tail call i32 @bcmp(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b, i64 16)
- %cmp = icmp ne i32 %bcmp, 0
- ret i1 %cmp
+ %1 = load i64, ptr %a, align 1
+ %2 = load i64, ptr %b, align 1
+ %3 = xor i64 %1, %2
+ %4 = getelementptr i8, ptr %a, i64 8
+ %5 = getelementptr i8, ptr %b, i64 8
+ %6 = load i64, ptr %4, align 1
+ %7 = load i64, ptr %5, align 1
+ %8 = xor i64 %6, %7
+ %9 = or i64 %3, %8
+ %10 = icmp ne i64 %9, 0
+ %11 = zext i1 %10 to i32
+ ret i1 %10
}
; Doesn't increase the number of instructions, where the LHS has multiple uses
diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
index 30123a31cebbe9..fc0bc1b9661163 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
@@ -25,20 +25,23 @@ define i64 @one_dimensional(ptr %a, ptr %b, i64 %N) {
entry:
br label %for.body
-for.body: ; preds = %entry, %for.body
+for.body: ; preds = %for.body, %entry
%i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06
%0 = load ptr, ptr %arrayidx, align 8
- %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 4)
- %tobool = icmp eq i32 %bcmp, 0
+ %1 = load i32, ptr %0, align 1
+ %2 = load i32, ptr %b, align 1
+ %3 = icmp ne i32 %1, %2
+ %4 = zext i1 %3 to i32
+ %tobool = icmp eq i32 %4, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.05, %add
%inc = add nuw i64 %i.06, 1
%exitcond = icmp eq i64 %inc, %N
br i1 %exitcond, label %for.exit, label %for.body
-for.exit: ; preds = %for.body
+for.exit: ; preds = %for.body
ret i64 %spec.select
}
@@ -79,32 +82,35 @@ define i64 @two_dimensional(ptr %a, ptr %b, i64 %N, i64 %M) {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %entry, %for.cond1.for.exit3_crit_edge
+for.cond1.preheader: ; preds = %for.cond1.for.exit3_crit_edge, %entry
%i.019 = phi i64 [ %inc7, %for.cond1.for.exit3_crit_edge ], [ 0, %entry ]
%sum.018 = phi i64 [ %spec.select, %for.cond1.for.exit3_crit_edge ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.019
%0 = load ptr, ptr %arrayidx, align 8
br label %for.body4
-for.body4: ; preds = %for.cond1.preheader, %for.body4
+for.body4: ; preds = %for.body4, %for.cond1.preheader
%j.016 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ]
%sum.115 = phi i64 [ %sum.018, %for.cond1.preheader ], [ %spec.select, %for.body4 ]
%arrayidx5 = getelementptr inbounds ptr, ptr %0, i64 %j.016
%1 = load ptr, ptr %arrayidx5, align 8
- %bcmp = tail call i32 @bcmp(ptr %1, ptr %b, i64 4)
- %tobool = icmp eq i32 %bcmp, 0
+ %2 = load i32, ptr %1, align 1
+ %3 = load i32, ptr %b, align 1
+ %4 = icmp ne i32 %2, %3
+ %5 = zext i1 %4 to i32
+ %tobool = icmp eq i32 %5, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.115, %add
%inc = add nuw i64 %j.016, 1
%exitcond = icmp eq i64 %inc, %M
br i1 %exitcond, label %for.cond1.for.exit3_crit_edge, label %for.body4
-for.cond1.for.exit3_crit_edge: ; preds = %for.body4
+for.cond1.for.exit3_crit_edge: ; preds = %for.body4
%inc7 = add nuw i64 %i.019, 1
%exitcond22 = icmp eq i64 %inc7, %N
br i1 %exitcond22, label %for.exit, label %for.cond1.preheader
-for.exit: ; preds = %for.cond1.for.exit3_crit_edge
+for.exit: ; preds = %for.cond1.for.exit3_crit_edge
ret i64 %spec.select
}
@@ -159,44 +165,47 @@ define i64 @three_dimensional(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %entry, %for.cond1.for.cond
+for.cond1.preheader: ; preds = %for.cond1.for.cond, %entry
%i.033 = phi i64 [ %inc15, %for.cond1.for.cond ], [ 0, %entry ]
%sum.032 = phi i64 [ %spec.select, %for.cond1.for.cond ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.033
%0 = load ptr, ptr %arrayidx, align 8
br label %for.cond5.preheader
-for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader
+for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader
%j.029 = phi i64 [ 0, %for.cond1.preheader ], [ %inc12, %for.cond5.for.cond ]
%sum.128 = phi i64 [ %sum.032, %for.cond1.preheader ], [ %spec.select, %for.cond5.for.cond ]
%arrayidx9 = getelementptr inbounds ptr, ptr %0, i64 %j.029
%1 = load ptr, ptr %arrayidx9, align 8
br label %for.body8
-for.body8: ; preds = %for.body8, %for.cond5.preheader
+for.body8: ; preds = %for.body8, %for.cond5.preheader
%k.026 = phi i64 [ 0, %for.cond5.preheader ], [ %inc, %for.body8 ]
%sum.225 = phi i64 [ %sum.128, %for.cond5.preheader ], [ %spec.select, %for.body8 ]
%arrayidx10 = getelementptr inbounds ptr, ptr %1, i64 %k.026
%2 = load ptr, ptr %arrayidx10, align 8
- %bcmp = tail call i32 @bcmp(ptr %2, ptr %b, i64 4)
- %tobool = icmp eq i32 %bcmp, 0
+ %3 = load i32, ptr %2, align 1
+ %4 = load i32, ptr %b, align 1
+ %5 = icmp ne i32 %3, %4
+ %6 = zext i1 %5 to i32
+ %tobool = icmp eq i32 %6, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.225, %add
%inc = add nuw i64 %k.026, 1
%exitcond = icmp eq i64 %inc, %K
br i1 %exitcond, label %for.cond5.for.cond, label %for.body8
-for.cond5.for.cond: ; preds = %for.body8
+for.cond5.for.cond: ; preds = %for.body8
%inc12 = add nuw i64 %j.029, 1
%exitcond44 = icmp eq i64 %inc12, %M
br i1 %exitcond44, label %for.cond1.for.cond, label %for.cond5.preheader
-for.cond1.for.cond: ; preds = %for.cond5.for.cond
+for.cond1.for.cond: ; preds = %for.cond5.for.cond
%inc15 = add nuw i64 %i.033, 1
%exitcond45 = icmp eq i64 %inc15, %N
br i1 %exitcond45, label %for.exit, label %for.cond1.preheader
-for.exit: ; preds = %for.cond1.for.cond
+for.exit: ; preds = %for.cond1.for.cond
ret i64 %spec.select
}
@@ -254,14 +263,14 @@ define i64 @three_dimensional_middle(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %entry, %for.cond1.for.cond
+for.cond1.preheader: ; preds = %for.cond1.for.cond, %entry
%i.035 = phi i64 [ %inc16, %for.cond1.for.cond ], [ 0, %entry ]
%sum.034 = phi i64 [ %spec.select, %for.cond1.for.cond ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.035
%0 = load ptr, ptr %arrayidx, align 8
br label %for.cond5.preheader
-for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader
+for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader
%j.031 = phi i64 [ 0, %for.cond1.preheader ], [ %inc13, %for.cond5.for.cond ]
%sum.130 = phi i64 [ %sum.034, %for.cond1.preheader ], [ %spec.select, %for.cond5.for.cond ]
%arrayidx9 = getelementptr inbounds ptr, ptr %0, i64 %j.031
@@ -270,30 +279,33 @@ for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.con
%2 = load ptr, ptr %arrayidx11, align 8
br label %for.body8
-for.body8: ; preds = %for.body8, %for.cond5.preheader
+for.body8: ; preds = %for.body8, %for.cond5.preheader
%k.028 = phi i64 [ 0, %for.cond5.preheader ], [ %inc, %for.body8 ]
%sum.227 = phi i64 [ %sum.130, %for.cond5.preheader ], [ %spec.select, %for.body8 ]
%arrayidx10 = getelementptr inbounds ptr, ptr %1, i64 %k.028
%3 = load ptr, ptr %arrayidx10, align 8
- %bcmp = tail call i32 @bcmp(ptr %3, ptr %2, i64 4)
- %tobool = icmp eq i32 %bcmp, 0
+ %4 = load i32, ptr %3, align 1
+ %5 = load i32, ptr %2, align 1
+ %6 = icmp ne i32 %4, %5
+ %7 = zext i1 %6 to i32
+ %tobool = icmp eq i32 %7, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.227, %add
%inc = add nuw i64 %k.028, 1
%exitcond = icmp eq i64 %inc, %K
br i1 %exitcond, label %for.cond5.for.cond, label %for.body8
-for.cond5.for.cond: ; preds = %for.body8
+for.cond5.for.cond: ; preds = %for.body8
%inc13 = add nuw i64 %j.031, 1
%exitcond46 = icmp eq i64 %inc13, %M
br i1 %exitcond46, label %for.cond1.for.cond, label %for.cond5.preheader
-for.cond1.for.cond: ; preds = %for.cond5.for.cond
+for.cond1.for.cond: ; preds = %for.cond5.for.cond
%inc16 = add nuw i64 %i.035, 1
%exitcond47 = icmp eq i64 %inc16, %N
br i1 %exitcond47, label %for.exit, label %for.cond1.preheader
-for.exit: ; preds = %for.cond1.for.cond
+for.exit: ; preds = %for.cond1.for.cond
ret i64 %spec.select
}
@@ -328,19 +340,27 @@ for.body.preheader: ; preds = %entry
%wide.trip.count = zext i32 %N to i64
br label %for.body
-for.body: ; preds = %for.body.preheader, %for.body
+for.body: ; preds = %for.body, %for.body.preheader
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv
%0 = load ptr, ptr %arrayidx, align 8
- %call = tail call i32 @memcmp(ptr %0, ptr %b, i64 4)
- %conv = trunc i32 %call to i8
+ %1 = load i32, ptr %0, align 1
+ %2 = load i32, ptr %b, align 1
+ %3 = call i32 @llvm.bswap.i32(i32 %1)
+ %4 = call i32 @llvm.bswap.i32(i32 %2)
+ %5 = icmp ugt i32 %3, %4
+ %6 = icmp ult i32 %3, %4
+ %7 = zext i1 %5 to i32
+ %8 = zext i1 %6 to i32
+ %9 = sub i32 %7, %8
+ %conv = trunc i32 %9 to i8
%arrayidx2 = getelementptr inbounds i8, ptr %c, i64 %indvars.iv
store i8 %conv, ptr %arrayidx2, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.exit, label %for.body
-for.exit: ; preds = %for.body
+for.exit: ; preds = %for.body
ret void
}
@@ -385,13 +405,16 @@ for.body.preheader: ; preds = %entry
%wide.trip.count = zext i32 %N to i64
br label %for.body
-for.body: ; preds = %for.body.preheader, %for.body
+for.body: ; preds = %for.body, %for.body.preheader
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%sum.05 = phi i32 [ 0, %for.body.preheader ], [ %spec.select, %for.body ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv
%0 = load ptr, ptr %arrayidx, align 8
- %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 4)
- %tobool.not = icmp eq i32 %bcmp, 0
+ %1 = load i32, ptr %0, align 1
+ %2 = load i32, ptr %b, align 1
+ %3 = icmp ne i32 %1, %2
+ %4 = zext i1 %3 to i32
+ %tobool.not = icmp eq i32 %4, 0
%add = zext i1 %tobool.not to i32
%spec.select = add nuw nsw i32 %sum.05, %add
tail call void @func()
@@ -399,7 +422,7 @@ for.body: ; preds = %for.body.preheader,
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.exit, label %for.body
-for.exit: ; preds = %for.body
+for.exit: ; preds = %for.body
ret i32 %spec.select
}
@@ -431,20 +454,32 @@ define i64 @one_dimensional_two_loads(ptr %a, ptr %b, i64 %N) {
entry:
br label %for.body
-for.body: ; preds = %entry, %for.body
+for.body: ; preds = %for.body, %entry
%i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06
%0 = load ptr, ptr %arrayidx, align 8
- %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 6)
- %tobool = icmp eq i32 %bcmp, 0
+ %1 = load i32, ptr %0, align 1
+ %2 = load i32, ptr %b, align 1
+ %3 = xor i32 %1, %2
+ %4 = getelementptr i8, ptr %0, i64 4
+ %5 = getelementptr i8, ptr %b, i64 4
+ %6 = load i16, ptr %4, align 1
+ %7 = load i16, ptr %5, align 1
+ %8 = zext i16 %6 to i32
+ %9 = zext i16 %7 to i32
+ %10 = xor i32 %8, %9
+ %11 = or i32 %3, %10
+ %12 = icmp ne i32 %11, 0
+ %13 = zext i1 %12 to i32
+ %tobool = icmp eq i32 %13, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.05, %add
%inc = add nuw i64 %i.06, 1
%exitcond = icmp eq i64 %inc, %N
br i1 %exitcond, label %for.exit, label %for.body
-for.exit: ; preds = %for.body
+for.exit: ; preds = %for.body
ret i64 %spec.select
}
@@ -475,18 +510,18 @@ define i64 @hoisting_no_cse(ptr %a, ptr %b, ptr %c, i64 %N) {
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: ret
entry:
- %b.val = load i64, ptr %b
+ %b.val = load i64, ptr %b, align 8
%b.val.changed = add i64 %b.val, 1
- store i64 %b.val.changed, ptr %c
+ store i64 %b.val.changed, ptr %c, align 8
br label %for.body
-for.body: ; preds = %entry, %for.body
+for.body: ; preds = %for.body, %entry
%idx = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%sum = phi i64 [ %spec.select, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %idx
%0 = load ptr, ptr %arrayidx, align 8
- %x = load i64, ptr %0
- %y = load i64, ptr %b
+ %x = load i64, ptr %0, align 8
+ %y = load i64, ptr %b, align 8
%cmp = icmp eq i64 %x, %y
%add = zext i1 %cmp to i64
%spec.select = add i64 %sum, %add
@@ -494,10 +529,15 @@ for.body: ; preds = %entry, %for.body
%exitcond = icmp eq i64 %inc, %N
br i1 %exitcond, label %for.exit, label %for.body
-for.exit: ; preds = %for.body
+for.exit: ; preds = %for.body
ret i64 %spec.select
}
declare i32 @bcmp(ptr, ptr, i64)
declare i32 @memcmp(ptr, ptr, i64)
declare void @func()
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.bswap.i32(i32) #0
+
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll
deleted file mode 100644
index 4da7c8c95a4e4f..00000000000000
--- a/llvm/test/CodeGen/AArch64/memcmp.ll
+++ /dev/null
@@ -1,3029 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
-
- at .str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1
-
-declare dso_local i32 @memcmp(ptr, ptr, i64)
-
-define i32 @length0(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length0:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
- ret i32 %m
- }
-
-define i1 @length0_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length0_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length0_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length2(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: lsr w8, w8, #16
-; CHECK-NEXT: sub w0, w8, w9, lsr #16
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- ret i32 %m
-}
-
-define i32 @length2_const(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length2_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w9, [x0]
-; CHECK-NEXT: mov w8, #-12594 // =0xffffcece
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: add w0, w8, w9, lsr #16
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
- ret i32 %m
-}
-
-define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length2_gt_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w9, [x0]
-; CHECK-NEXT: mov w8, #-12594 // =0xffffcece
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: add w8, w8, w9, lsr #16
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length2_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length2_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: lsr w8, w8, #16
-; CHECK-NEXT: sub w8, w8, w9, lsr #16
-; CHECK-NEXT: lsr w0, w8, #31
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length2_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: lsr w8, w8, #16
-; CHECK-NEXT: sub w8, w8, w9, lsr #16
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length2_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: mov w9, #12849 // =0x3231
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length2_eq_nobuiltin_attr:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #2 // =0x2
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length3(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length3:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0, #2]
-; CHECK-NEXT: ldrh w9, [x0]
-; CHECK-NEXT: ldrb w10, [x1, #2]
-; CHECK-NEXT: ldrh w11, [x1]
-; CHECK-NEXT: orr w8, w9, w8, lsl #16
-; CHECK-NEXT: orr w9, w11, w10, lsl #16
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w0, w8, w9
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
- ret i32 %m
-}
-
-define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length3_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: ldrb w10, [x0, #2]
-; CHECK-NEXT: ldrb w11, [x1, #2]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: ccmp w10, w11, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length4(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w0, w8, w9
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- ret i32 %m
-}
-
-define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length4_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length4_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w0, lo
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length4_lt_32(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length4_lt_32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w0, lo
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = lshr i32 %m, 31
- ret i32 %c
-}
-
-define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length4_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w0, hi
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length4_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: mov w9, #12849 // =0x3231
-; CHECK-NEXT: movk w9, #13363, lsl #16
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length5(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length5:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0, #4]
-; CHECK-NEXT: ldr w9, [x0]
-; CHECK-NEXT: ldrb w10, [x1, #4]
-; CHECK-NEXT: ldr w11, [x1]
-; CHECK-NEXT: orr x8, x9, x8, lsl #32
-; CHECK-NEXT: orr x9, x11, x10, lsl #32
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w0, w8, w9
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- ret i32 %m
-}
-
-define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length5_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: ldrb w10, [x0, #4]
-; CHECK-NEXT: ldrb w11, [x1, #4]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: ccmp w10, w11, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length5_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0, #4]
-; CHECK-NEXT: ldr w9, [x0]
-; CHECK-NEXT: ldrb w10, [x1, #4]
-; CHECK-NEXT: ldr w11, [x1]
-; CHECK-NEXT: orr x8, x9, x8, lsl #32
-; CHECK-NEXT: orr x9, x11, x10, lsl #32
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: cset w0, lo
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length6(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length6:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0, #4]
-; CHECK-NEXT: ldr w9, [x0]
-; CHECK-NEXT: ldrh w10, [x1, #4]
-; CHECK-NEXT: ldr w11, [x1]
-; CHECK-NEXT: orr x8, x9, x8, lsl #32
-; CHECK-NEXT: orr x9, x11, x10, lsl #32
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w0, w8, w9
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind
- ret i32 %m
-}
-
-define i32 @length6_lt(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length6_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0, #4]
-; CHECK-NEXT: ldr w9, [x0]
-; CHECK-NEXT: ldrh w10, [x1, #4]
-; CHECK-NEXT: ldr w11, [x1]
-; CHECK-NEXT: orr x8, x9, x8, lsl #32
-; CHECK-NEXT: orr x9, x11, x10, lsl #32
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: cset w0, lo
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind
- %r = lshr i32 %m, 31
- ret i32 %r
-}
-
-define i32 @length7(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length7:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB24_3
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldur w8, [x0, #3]
-; CHECK-NEXT: ldur w9, [x1, #3]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB24_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB24_3: // %res_block
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
- ret i32 %m
-}
-
-define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length7_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB25_3
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldur w8, [x0, #3]
-; CHECK-NEXT: ldur w9, [x1, #3]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB25_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: lsr w0, wzr, #31
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB25_3: // %res_block
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: lsr w0, w8, #31
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length7_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length7_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: ldr w9, [x1]
-; CHECK-NEXT: ldur w10, [x0, #3]
-; CHECK-NEXT: ldur w11, [x1, #3]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: ccmp w10, w11, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length8(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w0, w8, w9
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
- ret i32 %m
-}
-
-define i1 @length8_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length8_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length8_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length8_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x9, #12592 // =0x3130
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: movk x9, #13106, lsl #16
-; CHECK-NEXT: movk x9, #13620, lsl #32
-; CHECK-NEXT: movk x9, #14134, lsl #48
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length9(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length9:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB30_2
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldrb w8, [x0, #8]
-; CHECK-NEXT: ldrb w9, [x1, #8]
-; CHECK-NEXT: sub w0, w8, w9
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB30_2: // %res_block
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind
- ret i32 %m
-}
-
-define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length9_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldrb w10, [x0, #8]
-; CHECK-NEXT: ldrb w11, [x1, #8]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length10(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length10:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB32_3
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldrh w8, [x0, #8]
-; CHECK-NEXT: ldrh w9, [x1, #8]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: lsr w8, w8, #16
-; CHECK-NEXT: lsr w9, w9, #16
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB32_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB32_3: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind
- ret i32 %m
-}
-
-define i1 @length10_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length10_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldrh w10, [x0, #8]
-; CHECK-NEXT: ldrh w11, [x1, #8]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length11(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length11:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB34_3
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldur x8, [x0, #3]
-; CHECK-NEXT: ldur x9, [x1, #3]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB34_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB34_3: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind
- ret i32 %m
-}
-
-define i1 @length11_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length11_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldur x10, [x0, #3]
-; CHECK-NEXT: ldur x11, [x1, #3]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length12_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length12_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldr w10, [x0, #8]
-; CHECK-NEXT: ldr w11, [x1, #8]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length12(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length12:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB37_3
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr w8, [x0, #8]
-; CHECK-NEXT: ldr w9, [x1, #8]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB37_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB37_3: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
- ret i32 %m
-}
-
-define i1 @length13_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length13_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldur x10, [x0, #5]
-; CHECK-NEXT: ldur x11, [x1, #5]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length14_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length14_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldur x10, [x0, #6]
-; CHECK-NEXT: ldur x11, [x1, #6]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length15(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length15:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB40_3
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldur x8, [x0, #7]
-; CHECK-NEXT: ldur x9, [x1, #7]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB40_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB40_3: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
- ret i32 %m
-}
-
-define i1 @length15_lt(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length15_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB41_3
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldur x8, [x0, #7]
-; CHECK-NEXT: ldur x9, [x1, #7]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB41_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: lsr w0, wzr, #31
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB41_3: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: lsr w0, w8, #31
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length15_const(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length15_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #14136 // =0x3738
-; CHECK-NEXT: ldr x9, [x0]
-; CHECK-NEXT: movk x8, #13622, lsl #16
-; CHECK-NEXT: movk x8, #13108, lsl #32
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: movk x8, #12594, lsl #48
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: b.ne .LBB42_3
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: mov x8, #13365 // =0x3435
-; CHECK-NEXT: ldur x9, [x0, #7]
-; CHECK-NEXT: movk x8, #12851, lsl #16
-; CHECK-NEXT: movk x8, #12337, lsl #32
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: movk x8, #14393, lsl #48
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: b.ne .LBB42_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB42_3: // %res_block
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind
- ret i32 %m
-}
-
-define i1 @length15_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length15_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: ldur x10, [x0, #7]
-; CHECK-NEXT: ldur x11, [x1, #7]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length15_gt_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #14136 // =0x3738
-; CHECK-NEXT: ldr x9, [x0]
-; CHECK-NEXT: movk x8, #13622, lsl #16
-; CHECK-NEXT: movk x8, #13108, lsl #32
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: movk x8, #12594, lsl #48
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: b.ne .LBB44_3
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: mov x8, #13365 // =0x3435
-; CHECK-NEXT: ldur x9, [x0, #7]
-; CHECK-NEXT: movk x8, #12851, lsl #16
-; CHECK-NEXT: movk x8, #12337, lsl #32
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: movk x8, #14393, lsl #48
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: b.ne .LBB44_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB44_4
-; CHECK-NEXT: .LBB44_3: // %res_block
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB44_4: // %endblock
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-
-define i32 @length16(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB45_3
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB45_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB45_3: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
- ret i32 %m
-}
-
-define i1 @length16_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length16_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length16_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB47_3
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB47_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: lsr w0, wzr, #31
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB47_3: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: lsr w0, w8, #31
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length16_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB48_3
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB48_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB48_4
-; CHECK-NEXT: .LBB48_3: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB48_4: // %endblock
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length16_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #12592 // =0x3130
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: movk x8, #13106, lsl #16
-; CHECK-NEXT: movk x8, #13620, lsl #32
-; CHECK-NEXT: movk x8, #14134, lsl #48
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: mov x8, #14648 // =0x3938
-; CHECK-NEXT: movk x8, #12592, lsl #16
-; CHECK-NEXT: movk x8, #13106, lsl #32
-; CHECK-NEXT: movk x8, #13620, lsl #48
-; CHECK-NEXT: ccmp x10, x8, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-
-define i32 @length24(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length24:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB50_4
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB50_4
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB50_4
-; CHECK-NEXT: // %bb.3:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB50_4: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
- ret i32 %m
-}
-
-define i1 @length24_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length24_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldr x12, [x0, #16]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldr x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x12, x13, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length24_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB52_4
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB52_4
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB52_4
-; CHECK-NEXT: // %bb.3:
-; CHECK-NEXT: lsr w0, wzr, #31
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB52_4: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: lsr w0, w8, #31
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length24_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB53_4
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB53_4
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB53_4
-; CHECK-NEXT: // %bb.3:
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB53_5
-; CHECK-NEXT: .LBB53_4: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB53_5: // %endblock
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length24_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #12592 // =0x3130
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: movk x8, #13106, lsl #16
-; CHECK-NEXT: ldr x11, [x0, #16]
-; CHECK-NEXT: movk x8, #13620, lsl #32
-; CHECK-NEXT: movk x8, #14134, lsl #48
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: mov x8, #14648 // =0x3938
-; CHECK-NEXT: movk x8, #12592, lsl #16
-; CHECK-NEXT: movk x8, #13106, lsl #32
-; CHECK-NEXT: movk x8, #13620, lsl #48
-; CHECK-NEXT: ccmp x10, x8, #0, eq
-; CHECK-NEXT: mov x8, #14134 // =0x3736
-; CHECK-NEXT: movk x8, #14648, lsl #16
-; CHECK-NEXT: movk x8, #12592, lsl #32
-; CHECK-NEXT: movk x8, #13106, lsl #48
-; CHECK-NEXT: ccmp x11, x8, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length31(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length31:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB55_5
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB55_5
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB55_5
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldur x8, [x0, #23]
-; CHECK-NEXT: ldur x9, [x1, #23]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB55_5
-; CHECK-NEXT: // %bb.4:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB55_5: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind
- ret i32 %m
-}
-
-define i1 @length31_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length31_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldr x12, [x0, #16]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldr x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldur x8, [x0, #23]
-; CHECK-NEXT: ldur x9, [x1, #23]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x12, x13, #0, eq
-; CHECK-NEXT: ccmp x8, x9, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length31_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB57_5
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB57_5
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB57_5
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldur x8, [x0, #23]
-; CHECK-NEXT: ldur x9, [x1, #23]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB57_5
-; CHECK-NEXT: // %bb.4:
-; CHECK-NEXT: lsr w0, wzr, #31
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB57_5: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: lsr w0, w8, #31
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length31_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB58_5
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB58_5
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB58_5
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldur x8, [x0, #23]
-; CHECK-NEXT: ldur x9, [x1, #23]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB58_5
-; CHECK-NEXT: // %bb.4:
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB58_6
-; CHECK-NEXT: .LBB58_5: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB58_6: // %endblock
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; CHECK-LABEL: length31_eq_prefer128:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldr x12, [x0, #16]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldr x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldur x8, [x0, #23]
-; CHECK-NEXT: ldur x9, [x1, #23]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x12, x13, #0, eq
-; CHECK-NEXT: ccmp x8, x9, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length31_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #12592 // =0x3130
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: movk x8, #13106, lsl #16
-; CHECK-NEXT: ldr x11, [x0, #16]
-; CHECK-NEXT: movk x8, #13620, lsl #32
-; CHECK-NEXT: movk x8, #14134, lsl #48
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: mov x8, #14648 // =0x3938
-; CHECK-NEXT: ldur x9, [x0, #23]
-; CHECK-NEXT: movk x8, #12592, lsl #16
-; CHECK-NEXT: movk x8, #13106, lsl #32
-; CHECK-NEXT: movk x8, #13620, lsl #48
-; CHECK-NEXT: ccmp x10, x8, #0, eq
-; CHECK-NEXT: mov x8, #14134 // =0x3736
-; CHECK-NEXT: movk x8, #14648, lsl #16
-; CHECK-NEXT: movk x8, #12592, lsl #32
-; CHECK-NEXT: movk x8, #13106, lsl #48
-; CHECK-NEXT: ccmp x11, x8, #0, eq
-; CHECK-NEXT: mov x8, #13363 // =0x3433
-; CHECK-NEXT: movk x8, #13877, lsl #16
-; CHECK-NEXT: movk x8, #14391, lsl #32
-; CHECK-NEXT: movk x8, #12345, lsl #48
-; CHECK-NEXT: ccmp x9, x8, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length32(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB61_5
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB61_5
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB61_5
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldr x8, [x0, #24]
-; CHECK-NEXT: ldr x9, [x1, #24]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB61_5
-; CHECK-NEXT: // %bb.4:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB61_5: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
- ret i32 %m
-}
-
-
-define i1 @length32_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length32_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldp x12, x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldp x8, x9, [x0, #16]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x8, x12, #0, eq
-; CHECK-NEXT: ccmp x9, x13, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length32_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB63_5
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB63_5
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB63_5
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldr x8, [x0, #24]
-; CHECK-NEXT: ldr x9, [x1, #24]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB63_5
-; CHECK-NEXT: // %bb.4:
-; CHECK-NEXT: lsr w0, wzr, #31
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB63_5: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: lsr w0, w8, #31
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length32_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB64_5
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB64_5
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB64_5
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldr x8, [x0, #24]
-; CHECK-NEXT: ldr x9, [x1, #24]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB64_5
-; CHECK-NEXT: // %bb.4:
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB64_6
-; CHECK-NEXT: .LBB64_5: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB64_6: // %endblock
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; CHECK-LABEL: length32_eq_prefer128:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldp x12, x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldp x8, x9, [x0, #16]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x8, x12, #0, eq
-; CHECK-NEXT: ccmp x9, x13, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length32_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #12592 // =0x3130
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: movk x8, #13106, lsl #16
-; CHECK-NEXT: movk x8, #13620, lsl #32
-; CHECK-NEXT: movk x8, #14134, lsl #48
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: mov x8, #14648 // =0x3938
-; CHECK-NEXT: movk x8, #12592, lsl #16
-; CHECK-NEXT: ldp x9, x11, [x0, #16]
-; CHECK-NEXT: movk x8, #13106, lsl #32
-; CHECK-NEXT: movk x8, #13620, lsl #48
-; CHECK-NEXT: ccmp x10, x8, #0, eq
-; CHECK-NEXT: mov x8, #14134 // =0x3736
-; CHECK-NEXT: movk x8, #14648, lsl #16
-; CHECK-NEXT: movk x8, #12592, lsl #32
-; CHECK-NEXT: movk x8, #13106, lsl #48
-; CHECK-NEXT: ccmp x9, x8, #0, eq
-; CHECK-NEXT: mov x8, #13620 // =0x3534
-; CHECK-NEXT: movk x8, #14134, lsl #16
-; CHECK-NEXT: movk x8, #14648, lsl #32
-; CHECK-NEXT: movk x8, #12592, lsl #48
-; CHECK-NEXT: ccmp x11, x8, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length48(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length48:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB67_7
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB67_7
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB67_7
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldr x8, [x0, #24]
-; CHECK-NEXT: ldr x9, [x1, #24]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB67_7
-; CHECK-NEXT: // %bb.4: // %loadbb4
-; CHECK-NEXT: ldr x8, [x0, #32]
-; CHECK-NEXT: ldr x9, [x1, #32]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB67_7
-; CHECK-NEXT: // %bb.5: // %loadbb5
-; CHECK-NEXT: ldr x8, [x0, #40]
-; CHECK-NEXT: ldr x9, [x1, #40]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB67_7
-; CHECK-NEXT: // %bb.6:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB67_7: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind
- ret i32 %m
-}
-
-define i1 @length48_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length48_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldp x12, x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldp x8, x9, [x0, #16]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x8, x12, #0, eq
-; CHECK-NEXT: ldp x8, x11, [x0, #32]
-; CHECK-NEXT: ldp x10, x12, [x1, #32]
-; CHECK-NEXT: ccmp x9, x13, #0, eq
-; CHECK-NEXT: ccmp x8, x10, #0, eq
-; CHECK-NEXT: ccmp x11, x12, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length48_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB69_7
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB69_7
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB69_7
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldr x8, [x0, #24]
-; CHECK-NEXT: ldr x9, [x1, #24]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB69_7
-; CHECK-NEXT: // %bb.4: // %loadbb4
-; CHECK-NEXT: ldr x8, [x0, #32]
-; CHECK-NEXT: ldr x9, [x1, #32]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB69_7
-; CHECK-NEXT: // %bb.5: // %loadbb5
-; CHECK-NEXT: ldr x8, [x0, #40]
-; CHECK-NEXT: ldr x9, [x1, #40]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB69_7
-; CHECK-NEXT: // %bb.6:
-; CHECK-NEXT: lsr w0, wzr, #31
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB69_7: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: lsr w0, w8, #31
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length48_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB70_7
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB70_7
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB70_7
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldr x8, [x0, #24]
-; CHECK-NEXT: ldr x9, [x1, #24]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB70_7
-; CHECK-NEXT: // %bb.4: // %loadbb4
-; CHECK-NEXT: ldr x8, [x0, #32]
-; CHECK-NEXT: ldr x9, [x1, #32]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB70_7
-; CHECK-NEXT: // %bb.5: // %loadbb5
-; CHECK-NEXT: ldr x8, [x0, #40]
-; CHECK-NEXT: ldr x9, [x1, #40]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB70_7
-; CHECK-NEXT: // %bb.6:
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB70_8
-; CHECK-NEXT: .LBB70_7: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB70_8: // %endblock
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; CHECK-LABEL: length48_eq_prefer128:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldp x12, x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldp x8, x9, [x0, #16]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x8, x12, #0, eq
-; CHECK-NEXT: ldp x8, x11, [x0, #32]
-; CHECK-NEXT: ldp x10, x12, [x1, #32]
-; CHECK-NEXT: ccmp x9, x13, #0, eq
-; CHECK-NEXT: ccmp x8, x10, #0, eq
-; CHECK-NEXT: ccmp x11, x12, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length48_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #12592 // =0x3130
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: movk x8, #13106, lsl #16
-; CHECK-NEXT: ldp x11, x12, [x0, #16]
-; CHECK-NEXT: movk x8, #13620, lsl #32
-; CHECK-NEXT: movk x8, #14134, lsl #48
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: mov x9, #14648 // =0x3938
-; CHECK-NEXT: movk x9, #12592, lsl #16
-; CHECK-NEXT: movk x9, #13106, lsl #32
-; CHECK-NEXT: movk x9, #13620, lsl #48
-; CHECK-NEXT: ccmp x10, x9, #0, eq
-; CHECK-NEXT: mov x9, #14134 // =0x3736
-; CHECK-NEXT: movk x9, #14648, lsl #16
-; CHECK-NEXT: movk x9, #12592, lsl #32
-; CHECK-NEXT: movk x9, #13106, lsl #48
-; CHECK-NEXT: ccmp x11, x9, #0, eq
-; CHECK-NEXT: mov x9, #13620 // =0x3534
-; CHECK-NEXT: movk x9, #14134, lsl #16
-; CHECK-NEXT: ldp x10, x11, [x0, #32]
-; CHECK-NEXT: movk x9, #14648, lsl #32
-; CHECK-NEXT: movk x9, #12592, lsl #48
-; CHECK-NEXT: ccmp x12, x9, #0, eq
-; CHECK-NEXT: mov x9, #13106 // =0x3332
-; CHECK-NEXT: movk x9, #13620, lsl #16
-; CHECK-NEXT: movk x9, #14134, lsl #32
-; CHECK-NEXT: movk x9, #14648, lsl #48
-; CHECK-NEXT: ccmp x10, x9, #0, eq
-; CHECK-NEXT: ccmp x11, x8, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length63(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length63:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldr x8, [x0, #24]
-; CHECK-NEXT: ldr x9, [x1, #24]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
-; CHECK-NEXT: // %bb.4: // %loadbb4
-; CHECK-NEXT: ldr x8, [x0, #32]
-; CHECK-NEXT: ldr x9, [x1, #32]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
-; CHECK-NEXT: // %bb.5: // %loadbb5
-; CHECK-NEXT: ldr x8, [x0, #40]
-; CHECK-NEXT: ldr x9, [x1, #40]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
-; CHECK-NEXT: // %bb.6: // %loadbb6
-; CHECK-NEXT: ldr x8, [x0, #48]
-; CHECK-NEXT: ldr x9, [x1, #48]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
-; CHECK-NEXT: // %bb.7: // %loadbb7
-; CHECK-NEXT: ldur x8, [x0, #55]
-; CHECK-NEXT: ldur x9, [x1, #55]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
-; CHECK-NEXT: // %bb.8:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB73_9: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind
- ret i32 %m
-}
-
-define i1 @length63_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length63_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldp x12, x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldp x8, x9, [x0, #16]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x8, x12, #0, eq
-; CHECK-NEXT: ldp x8, x11, [x0, #32]
-; CHECK-NEXT: ldp x10, x12, [x1, #32]
-; CHECK-NEXT: ccmp x9, x13, #0, eq
-; CHECK-NEXT: ldr x9, [x0, #48]
-; CHECK-NEXT: ldr x13, [x1, #48]
-; CHECK-NEXT: ccmp x8, x10, #0, eq
-; CHECK-NEXT: ldur x8, [x0, #55]
-; CHECK-NEXT: ldur x10, [x1, #55]
-; CHECK-NEXT: ccmp x11, x12, #0, eq
-; CHECK-NEXT: ccmp x9, x13, #0, eq
-; CHECK-NEXT: ccmp x8, x10, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length63_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB75_9
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB75_9
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB75_9
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldr x8, [x0, #24]
-; CHECK-NEXT: ldr x9, [x1, #24]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB75_9
-; CHECK-NEXT: // %bb.4: // %loadbb4
-; CHECK-NEXT: ldr x8, [x0, #32]
-; CHECK-NEXT: ldr x9, [x1, #32]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB75_9
-; CHECK-NEXT: // %bb.5: // %loadbb5
-; CHECK-NEXT: ldr x8, [x0, #40]
-; CHECK-NEXT: ldr x9, [x1, #40]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB75_9
-; CHECK-NEXT: // %bb.6: // %loadbb6
-; CHECK-NEXT: ldr x8, [x0, #48]
-; CHECK-NEXT: ldr x9, [x1, #48]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB75_9
-; CHECK-NEXT: // %bb.7: // %loadbb7
-; CHECK-NEXT: ldur x8, [x0, #55]
-; CHECK-NEXT: ldur x9, [x1, #55]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB75_9
-; CHECK-NEXT: // %bb.8:
-; CHECK-NEXT: lsr w0, wzr, #31
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB75_9: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: lsr w0, w8, #31
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length63_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldr x8, [x0, #24]
-; CHECK-NEXT: ldr x9, [x1, #24]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
-; CHECK-NEXT: // %bb.4: // %loadbb4
-; CHECK-NEXT: ldr x8, [x0, #32]
-; CHECK-NEXT: ldr x9, [x1, #32]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
-; CHECK-NEXT: // %bb.5: // %loadbb5
-; CHECK-NEXT: ldr x8, [x0, #40]
-; CHECK-NEXT: ldr x9, [x1, #40]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
-; CHECK-NEXT: // %bb.6: // %loadbb6
-; CHECK-NEXT: ldr x8, [x0, #48]
-; CHECK-NEXT: ldr x9, [x1, #48]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
-; CHECK-NEXT: // %bb.7: // %loadbb7
-; CHECK-NEXT: ldur x8, [x0, #55]
-; CHECK-NEXT: ldur x9, [x1, #55]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
-; CHECK-NEXT: // %bb.8:
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB76_10
-; CHECK-NEXT: .LBB76_9: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB76_10: // %endblock
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length63_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #12592 // =0x3130
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: movk x8, #13106, lsl #16
-; CHECK-NEXT: ldp x11, x12, [x0, #16]
-; CHECK-NEXT: movk x8, #13620, lsl #32
-; CHECK-NEXT: movk x8, #14134, lsl #48
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: mov x9, #14648 // =0x3938
-; CHECK-NEXT: movk x9, #12592, lsl #16
-; CHECK-NEXT: movk x9, #13106, lsl #32
-; CHECK-NEXT: movk x9, #13620, lsl #48
-; CHECK-NEXT: ccmp x10, x9, #0, eq
-; CHECK-NEXT: mov x10, #14134 // =0x3736
-; CHECK-NEXT: movk x10, #14648, lsl #16
-; CHECK-NEXT: movk x10, #12592, lsl #32
-; CHECK-NEXT: movk x10, #13106, lsl #48
-; CHECK-NEXT: ccmp x11, x10, #0, eq
-; CHECK-NEXT: mov x10, #13620 // =0x3534
-; CHECK-NEXT: movk x10, #14134, lsl #16
-; CHECK-NEXT: ldp x11, x13, [x0, #32]
-; CHECK-NEXT: movk x10, #14648, lsl #32
-; CHECK-NEXT: movk x10, #12592, lsl #48
-; CHECK-NEXT: ccmp x12, x10, #0, eq
-; CHECK-NEXT: mov x10, #13106 // =0x3332
-; CHECK-NEXT: ldr x12, [x0, #48]
-; CHECK-NEXT: movk x10, #13620, lsl #16
-; CHECK-NEXT: movk x10, #14134, lsl #32
-; CHECK-NEXT: movk x10, #14648, lsl #48
-; CHECK-NEXT: ccmp x11, x10, #0, eq
-; CHECK-NEXT: ldur x10, [x0, #55]
-; CHECK-NEXT: ccmp x13, x8, #0, eq
-; CHECK-NEXT: mov x8, #13877 // =0x3635
-; CHECK-NEXT: movk x8, #14391, lsl #16
-; CHECK-NEXT: ccmp x12, x9, #0, eq
-; CHECK-NEXT: movk x8, #12345, lsl #32
-; CHECK-NEXT: movk x8, #12849, lsl #48
-; CHECK-NEXT: ccmp x10, x8, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length64(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldr x8, [x0, #24]
-; CHECK-NEXT: ldr x9, [x1, #24]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
-; CHECK-NEXT: // %bb.4: // %loadbb4
-; CHECK-NEXT: ldr x8, [x0, #32]
-; CHECK-NEXT: ldr x9, [x1, #32]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
-; CHECK-NEXT: // %bb.5: // %loadbb5
-; CHECK-NEXT: ldr x8, [x0, #40]
-; CHECK-NEXT: ldr x9, [x1, #40]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
-; CHECK-NEXT: // %bb.6: // %loadbb6
-; CHECK-NEXT: ldr x8, [x0, #48]
-; CHECK-NEXT: ldr x9, [x1, #48]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
-; CHECK-NEXT: // %bb.7: // %loadbb7
-; CHECK-NEXT: ldr x8, [x0, #56]
-; CHECK-NEXT: ldr x9, [x1, #56]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
-; CHECK-NEXT: // %bb.8:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB78_9: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w0, w8, hs
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
- ret i32 %m
-}
-
-define i1 @length64_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length64_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x11, [x1]
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: ldp x12, x13, [x1, #16]
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: ldp x8, x9, [x0, #16]
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: ccmp x8, x12, #0, eq
-; CHECK-NEXT: ldp x8, x11, [x0, #32]
-; CHECK-NEXT: ldp x10, x12, [x1, #32]
-; CHECK-NEXT: ccmp x9, x13, #0, eq
-; CHECK-NEXT: ldp x9, x13, [x1, #48]
-; CHECK-NEXT: ccmp x8, x10, #0, eq
-; CHECK-NEXT: ldp x8, x10, [x0, #48]
-; CHECK-NEXT: ccmp x11, x12, #0, eq
-; CHECK-NEXT: ccmp x8, x9, #0, eq
-; CHECK-NEXT: ccmp x10, x13, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length64_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB80_9
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB80_9
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB80_9
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldr x8, [x0, #24]
-; CHECK-NEXT: ldr x9, [x1, #24]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB80_9
-; CHECK-NEXT: // %bb.4: // %loadbb4
-; CHECK-NEXT: ldr x8, [x0, #32]
-; CHECK-NEXT: ldr x9, [x1, #32]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB80_9
-; CHECK-NEXT: // %bb.5: // %loadbb5
-; CHECK-NEXT: ldr x8, [x0, #40]
-; CHECK-NEXT: ldr x9, [x1, #40]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB80_9
-; CHECK-NEXT: // %bb.6: // %loadbb6
-; CHECK-NEXT: ldr x8, [x0, #48]
-; CHECK-NEXT: ldr x9, [x1, #48]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB80_9
-; CHECK-NEXT: // %bb.7: // %loadbb7
-; CHECK-NEXT: ldr x8, [x0, #56]
-; CHECK-NEXT: ldr x9, [x1, #56]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB80_9
-; CHECK-NEXT: // %bb.8:
-; CHECK-NEXT: lsr w0, wzr, #31
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB80_9: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: lsr w0, w8, #31
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length64_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB81_9
-; CHECK-NEXT: // %bb.1: // %loadbb1
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr x9, [x1, #8]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB81_9
-; CHECK-NEXT: // %bb.2: // %loadbb2
-; CHECK-NEXT: ldr x8, [x0, #16]
-; CHECK-NEXT: ldr x9, [x1, #16]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB81_9
-; CHECK-NEXT: // %bb.3: // %loadbb3
-; CHECK-NEXT: ldr x8, [x0, #24]
-; CHECK-NEXT: ldr x9, [x1, #24]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB81_9
-; CHECK-NEXT: // %bb.4: // %loadbb4
-; CHECK-NEXT: ldr x8, [x0, #32]
-; CHECK-NEXT: ldr x9, [x1, #32]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB81_9
-; CHECK-NEXT: // %bb.5: // %loadbb5
-; CHECK-NEXT: ldr x8, [x0, #40]
-; CHECK-NEXT: ldr x9, [x1, #40]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB81_9
-; CHECK-NEXT: // %bb.6: // %loadbb6
-; CHECK-NEXT: ldr x8, [x0, #48]
-; CHECK-NEXT: ldr x9, [x1, #48]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB81_9
-; CHECK-NEXT: // %bb.7: // %loadbb7
-; CHECK-NEXT: ldr x8, [x0, #56]
-; CHECK-NEXT: ldr x9, [x1, #56]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: rev x9, x9
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB81_9
-; CHECK-NEXT: // %bb.8:
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB81_10
-; CHECK-NEXT: .LBB81_9: // %res_block
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB81_10: // %endblock
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length64_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #12592 // =0x3130
-; CHECK-NEXT: ldp x9, x10, [x0]
-; CHECK-NEXT: movk x8, #13106, lsl #16
-; CHECK-NEXT: ldp x11, x12, [x0, #16]
-; CHECK-NEXT: movk x8, #13620, lsl #32
-; CHECK-NEXT: ldp x13, x14, [x0, #32]
-; CHECK-NEXT: movk x8, #14134, lsl #48
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: mov x9, #14648 // =0x3938
-; CHECK-NEXT: movk x9, #12592, lsl #16
-; CHECK-NEXT: movk x9, #13106, lsl #32
-; CHECK-NEXT: movk x9, #13620, lsl #48
-; CHECK-NEXT: ccmp x10, x9, #0, eq
-; CHECK-NEXT: mov x10, #14134 // =0x3736
-; CHECK-NEXT: movk x10, #14648, lsl #16
-; CHECK-NEXT: movk x10, #12592, lsl #32
-; CHECK-NEXT: movk x10, #13106, lsl #48
-; CHECK-NEXT: ccmp x11, x10, #0, eq
-; CHECK-NEXT: mov x11, #13620 // =0x3534
-; CHECK-NEXT: movk x11, #14134, lsl #16
-; CHECK-NEXT: movk x11, #14648, lsl #32
-; CHECK-NEXT: movk x11, #12592, lsl #48
-; CHECK-NEXT: ccmp x12, x11, #0, eq
-; CHECK-NEXT: mov x11, #13106 // =0x3332
-; CHECK-NEXT: movk x11, #13620, lsl #16
-; CHECK-NEXT: movk x11, #14134, lsl #32
-; CHECK-NEXT: movk x11, #14648, lsl #48
-; CHECK-NEXT: ccmp x13, x11, #0, eq
-; CHECK-NEXT: ldp x11, x12, [x0, #48]
-; CHECK-NEXT: ccmp x14, x8, #0, eq
-; CHECK-NEXT: ccmp x11, x9, #0, eq
-; CHECK-NEXT: ccmp x12, x10, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length96(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length96:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w2, #96 // =0x60
-; CHECK-NEXT: b memcmp
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind
- ret i32 %m
-}
-
-define i1 @length96_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length96_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #96 // =0x60
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length96_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #96 // =0x60
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: lsr w0, w0, #31
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length96_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #96 // =0x60
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length96_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: adrp x1, .L.str
-; CHECK-NEXT: add x1, x1, :lo12:.L.str
-; CHECK-NEXT: mov w2, #96 // =0x60
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length127(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length127:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w2, #127 // =0x7f
-; CHECK-NEXT: b memcmp
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind
- ret i32 %m
-}
-
-define i1 @length127_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length127_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #127 // =0x7f
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length127_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #127 // =0x7f
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: lsr w0, w0, #31
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length127_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #127 // =0x7f
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length127_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: adrp x1, .L.str
-; CHECK-NEXT: add x1, x1, :lo12:.L.str
-; CHECK-NEXT: mov w2, #127 // =0x7f
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length128(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length128:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w2, #128 // =0x80
-; CHECK-NEXT: b memcmp
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind
- ret i32 %m
-}
-
-define i1 @length128_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length128_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #128 // =0x80
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length128_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #128 // =0x80
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: lsr w0, w0, #31
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length128_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #128 // =0x80
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length128_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: adrp x1, .L.str
-; CHECK-NEXT: add x1, x1, :lo12:.L.str
-; CHECK-NEXT: mov w2, #128 // =0x80
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length192(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length192:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w2, #192 // =0xc0
-; CHECK-NEXT: b memcmp
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind
- ret i32 %m
-}
-
-define i1 @length192_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length192_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #192 // =0xc0
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length192_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #192 // =0xc0
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: lsr w0, w0, #31
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length192_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #192 // =0xc0
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length192_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: adrp x1, .L.str
-; CHECK-NEXT: add x1, x1, :lo12:.L.str
-; CHECK-NEXT: mov w2, #192 // =0xc0
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length255(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length255:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w2, #255 // =0xff
-; CHECK-NEXT: b memcmp
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind
- ret i32 %m
-}
-
-define i1 @length255_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length255_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #255 // =0xff
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length255_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #255 // =0xff
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: lsr w0, w0, #31
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length255_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #255 // =0xff
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length255_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: adrp x1, .L.str
-; CHECK-NEXT: add x1, x1, :lo12:.L.str
-; CHECK-NEXT: mov w2, #255 // =0xff
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length256(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length256:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w2, #256 // =0x100
-; CHECK-NEXT: b memcmp
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind
- ret i32 %m
-}
-
-define i1 @length256_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length256_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #256 // =0x100
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length256_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #256 // =0x100
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: lsr w0, w0, #31
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length256_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #256 // =0x100
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length256_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: adrp x1, .L.str
-; CHECK-NEXT: add x1, x1, :lo12:.L.str
-; CHECK-NEXT: mov w2, #256 // =0x100
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length384(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length384:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w2, #384 // =0x180
-; CHECK-NEXT: b memcmp
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind
- ret i32 %m
-}
-
-define i1 @length384_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length384_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #384 // =0x180
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length384_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #384 // =0x180
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: lsr w0, w0, #31
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length384_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #384 // =0x180
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length384_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: adrp x1, .L.str
-; CHECK-NEXT: add x1, x1, :lo12:.L.str
-; CHECK-NEXT: mov w2, #384 // =0x180
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length511(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length511:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w2, #511 // =0x1ff
-; CHECK-NEXT: b memcmp
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind
- ret i32 %m
-}
-
-define i1 @length511_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length511_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #511 // =0x1ff
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length511_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #511 // =0x1ff
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: lsr w0, w0, #31
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length511_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #511 // =0x1ff
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length511_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: adrp x1, .L.str
-; CHECK-NEXT: add x1, x1, :lo12:.L.str
-; CHECK-NEXT: mov w2, #511 // =0x1ff
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length512(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length512:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w2, #512 // =0x200
-; CHECK-NEXT: b memcmp
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind
- ret i32 %m
-}
-
-define i1 @length512_eq(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length512_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #512 // =0x200
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_lt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length512_lt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #512 // =0x200
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: lsr w0, w0, #31
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_gt(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: length512_gt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov w2, #512 // =0x200
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, gt
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_eq_const(ptr %X) nounwind {
-; CHECK-LABEL: length512_eq_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: adrp x1, .L.str
-; CHECK-NEXT: add x1, x1, :lo12:.L.str
-; CHECK-NEXT: mov w2, #512 // =0x200
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @huge_length(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: huge_length:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x2, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT: b memcmp
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind
- ret i32 %m
-}
-
-define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: huge_length_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: mov x2, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind {
-; CHECK-LABEL: nonconst_length:
-; CHECK: // %bb.0:
-; CHECK-NEXT: b memcmp
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind
- ret i32 %m
-}
-
-define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind {
-; CHECK-LABEL: nonconst_length_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 8b0b6263832243..84210ec410d29f 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -203,13 +203,6 @@
; GCN-O1-NEXT: Canonicalize Freeze Instructions in Loops
; GCN-O1-NEXT: Induction Variable Users
; GCN-O1-NEXT: Loop Strength Reduction
-; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl)
-; GCN-O1-NEXT: Function Alias Analysis Results
-; GCN-O1-NEXT: Merge contiguous icmps into a memcmp
-; GCN-O1-NEXT: Natural Loop Information
-; GCN-O1-NEXT: Lazy Branch Probability Analysis
-; GCN-O1-NEXT: Lazy Block Frequency Analysis
-; GCN-O1-NEXT: Expand memcmp() to load/stores
; GCN-O1-NEXT: Lower constant intrinsics
; GCN-O1-NEXT: Remove unreachable blocks from the CFG
; GCN-O1-NEXT: Natural Loop Information
@@ -484,13 +477,6 @@
; GCN-O1-OPTS-NEXT: Canonicalize Freeze Instructions in Loops
; GCN-O1-OPTS-NEXT: Induction Variable Users
; GCN-O1-OPTS-NEXT: Loop Strength Reduction
-; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl)
-; GCN-O1-OPTS-NEXT: Function Alias Analysis Results
-; GCN-O1-OPTS-NEXT: Merge contiguous icmps into a memcmp
-; GCN-O1-OPTS-NEXT: Natural Loop Information
-; GCN-O1-OPTS-NEXT: Lazy Branch Probability Analysis
-; GCN-O1-OPTS-NEXT: Lazy Block Frequency Analysis
-; GCN-O1-OPTS-NEXT: Expand memcmp() to load/stores
; GCN-O1-OPTS-NEXT: Lower constant intrinsics
; GCN-O1-OPTS-NEXT: Remove unreachable blocks from the CFG
; GCN-O1-OPTS-NEXT: Natural Loop Information
@@ -784,13 +770,6 @@
; GCN-O2-NEXT: Canonicalize Freeze Instructions in Loops
; GCN-O2-NEXT: Induction Variable Users
; GCN-O2-NEXT: Loop Strength Reduction
-; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl)
-; GCN-O2-NEXT: Function Alias Analysis Results
-; GCN-O2-NEXT: Merge contiguous icmps into a memcmp
-; GCN-O2-NEXT: Natural Loop Information
-; GCN-O2-NEXT: Lazy Branch Probability Analysis
-; GCN-O2-NEXT: Lazy Block Frequency Analysis
-; GCN-O2-NEXT: Expand memcmp() to load/stores
; GCN-O2-NEXT: Lower constant intrinsics
; GCN-O2-NEXT: Remove unreachable blocks from the CFG
; GCN-O2-NEXT: Natural Loop Information
@@ -1092,13 +1071,6 @@
; GCN-O3-NEXT: Canonicalize Freeze Instructions in Loops
; GCN-O3-NEXT: Induction Variable Users
; GCN-O3-NEXT: Loop Strength Reduction
-; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl)
-; GCN-O3-NEXT: Function Alias Analysis Results
-; GCN-O3-NEXT: Merge contiguous icmps into a memcmp
-; GCN-O3-NEXT: Natural Loop Information
-; GCN-O3-NEXT: Lazy Branch Probability Analysis
-; GCN-O3-NEXT: Lazy Block Frequency Analysis
-; GCN-O3-NEXT: Expand memcmp() to load/stores
; GCN-O3-NEXT: Lower constant intrinsics
; GCN-O3-NEXT: Remove unreachable blocks from the CFG
; GCN-O3-NEXT: Natural Loop Information
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 5e565970fc3a86..f2bef2c7e46acc 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -21,13 +21,6 @@
; CHECK-NEXT: Canonicalize Freeze Instructions in Loops
; CHECK-NEXT: Induction Variable Users
; CHECK-NEXT: Loop Strength Reduction
-; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
-; CHECK-NEXT: Function Alias Analysis Results
-; CHECK-NEXT: Merge contiguous icmps into a memcmp
-; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: Lazy Branch Probability Analysis
-; CHECK-NEXT: Lazy Block Frequency Analysis
-; CHECK-NEXT: Expand memcmp() to load/stores
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Lower constant intrinsics
diff --git a/llvm/test/CodeGen/BPF/memcmp.ll b/llvm/test/CodeGen/BPF/memcmp.ll
deleted file mode 100644
index 7ed8dc1e736f4c..00000000000000
--- a/llvm/test/CodeGen/BPF/memcmp.ll
+++ /dev/null
@@ -1,77 +0,0 @@
-; RUN: llc -march=bpfel < %s | FileCheck %s
-; RUN: llc -march=bpfel -mcpu=v3 < %s | FileCheck %s
-;
-; Source code:
-; /* set aligned 4 to minimize the number of loads */
-; struct build_id {
-; unsigned char id[20];
-; } __attribute__((aligned(4)));
-;
-; /* try to compute a local build_id */
-; void bar1(ptr);
-;
-; /* the global build_id to compare */
-; struct build_id id2;
-;
-; int foo()
-; {
-; struct build_id id1;
-;
-; bar1(&id1);
-; return __builtin_memcmp(&id1, &id2, sizeof(id1)) == 0;
-; }
-; Compilation flags:
-; clang -target bpf -S -O2 t.c -emit-llvm
-
-
-%struct.build_id = type { [20 x i8] }
-
- at id2 = dso_local global %struct.build_id zeroinitializer, align 4
-
-; Function Attrs: nounwind
-define dso_local i32 @foo() local_unnamed_addr #0 {
-entry:
- %id11 = alloca [20 x i8], align 4
- call void @llvm.lifetime.start.p0(i64 20, ptr nonnull %id11) #4
- call void @bar1(ptr noundef nonnull %id11) #4
- %call = call i32 @memcmp(ptr noundef nonnull dereferenceable(20) %id11, ptr noundef nonnull dereferenceable(20) @id2, i64 noundef 20) #4
- %cmp = icmp eq i32 %call, 0
- %conv = zext i1 %cmp to i32
- call void @llvm.lifetime.end.p0(i64 20, ptr nonnull %id11) #4
- ret i32 %conv
-}
-
-; CHECK-DAG: *(u32 *)(r1 + 0)
-; CHECK-DAG: *(u32 *)(r1 + 4)
-; CHECK-DAG: *(u32 *)(r10 - 16)
-; CHECK-DAG: *(u32 *)(r10 - 20)
-; CHECK-DAG: *(u32 *)(r10 - 8)
-; CHECK-DAG: *(u32 *)(r10 - 12)
-; CHECK-DAG: *(u32 *)(r1 + 8)
-; CHECK-DAG: *(u32 *)(r1 + 12)
-; CHECK-DAG: *(u32 *)(r2 + 16)
-; CHECK-DAG: *(u32 *)(r10 - 4)
-
-; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
-
-declare dso_local void @bar1(ptr noundef) local_unnamed_addr #2
-
-; Function Attrs: argmemonly mustprogress nofree nounwind readonly willreturn
-declare dso_local i32 @memcmp(ptr nocapture noundef, ptr nocapture noundef, i64 noundef) local_unnamed_addr #3
-
-; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
-
-attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
-attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-attributes #3 = { argmemonly mustprogress nofree nounwind readonly willreturn "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-attributes #4 = { nounwind }
-
-!llvm.module.flags = !{!0, !1}
-!llvm.ident = !{!2}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{i32 7, !"frame-pointer", i32 2}
-!2 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git dea65874b2505f8f5e8e51fd8cad6908feb375ec)"}
diff --git a/llvm/test/CodeGen/Generic/llc-start-stop.ll b/llvm/test/CodeGen/Generic/llc-start-stop.ll
index b02472473a00cb..9ada245835981b 100644
--- a/llvm/test/CodeGen/Generic/llc-start-stop.ll
+++ b/llvm/test/CodeGen/Generic/llc-start-stop.ll
@@ -19,15 +19,15 @@
; STOP-BEFORE-NOT: Loop Strength Reduction
; RUN: llc < %s -debug-pass=Structure -start-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-AFTER
-; START-AFTER: -aa -mergeicmps
+; START-AFTER: -gc-lowering
; START-AFTER: FunctionPass Manager
-; START-AFTER-NEXT: Dominator Tree Construction
+; START-AFTER-NEXT: Lower Garbage Collection Instructions
; RUN: llc < %s -debug-pass=Structure -start-before=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-BEFORE
; START-BEFORE: -machine-branch-prob -regalloc-evict -regalloc-priority -domtree
; START-BEFORE: FunctionPass Manager
; START-BEFORE: Loop Strength Reduction
-; START-BEFORE-NEXT: Basic Alias Analysis (stateless AA impl)
+; START-BEFORE-NEXT: Lower Garbage Collection Instructions
; RUN: not --crash llc < %s -start-before=nonexistent -o /dev/null 2>&1 | FileCheck %s -check-prefix=NONEXISTENT-START-BEFORE
; RUN: not --crash llc < %s -stop-before=nonexistent -o /dev/null 2>&1 | FileCheck %s -check-prefix=NONEXISTENT-STOP-BEFORE
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index 3134d940545e80..696d8c8be017cb 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -23,8 +23,8 @@
; CHECK-NEXT: Type-Based Alias Analysis
; CHECK-NEXT: Scoped NoAlias Alias Analysis
; CHECK-NEXT: Assumption Cache Tracker
-; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Create Garbage Collector Module Metadata
+; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: Default Regalloc Eviction Advisor
; CHECK-NEXT: Default Regalloc Priority Advisor
@@ -44,13 +44,6 @@
; CHECK-NEXT: Canonicalize Freeze Instructions in Loops
; CHECK-NEXT: Induction Variable Users
; CHECK-NEXT: Loop Strength Reduction
-; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
-; CHECK-NEXT: Function Alias Analysis Results
-; CHECK-NEXT: Merge contiguous icmps into a memcmp
-; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: Lazy Branch Probability Analysis
-; CHECK-NEXT: Lazy Block Frequency Analysis
-; CHECK-NEXT: Expand memcmp() to load/stores
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Lower constant intrinsics
diff --git a/llvm/test/CodeGen/M68k/pipeline.ll b/llvm/test/CodeGen/M68k/pipeline.ll
index dfaa149b7a4744..ad053cf4d61a07 100644
--- a/llvm/test/CodeGen/M68k/pipeline.ll
+++ b/llvm/test/CodeGen/M68k/pipeline.ll
@@ -15,13 +15,6 @@
; CHECK-NEXT: Canonicalize Freeze Instructions in Loops
; CHECK-NEXT: Induction Variable Users
; CHECK-NEXT: Loop Strength Reduction
-; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
-; CHECK-NEXT: Function Alias Analysis Results
-; CHECK-NEXT: Merge contiguous icmps into a memcmp
-; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: Lazy Branch Probability Analysis
-; CHECK-NEXT: Lazy Block Frequency Analysis
-; CHECK-NEXT: Expand memcmp() to load/stores
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Lower constant intrinsics
diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
index 6ce4416211cc4d..1fdb4802eff036 100644
--- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
@@ -11,8 +11,8 @@
; CHECK-NEXT: Assumption Cache Tracker
; CHECK-NEXT: Type-Based Alias Analysis
; CHECK-NEXT: Scoped NoAlias Alias Analysis
-; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Create Garbage Collector Module Metadata
+; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: Default Regalloc Eviction Advisor
; CHECK-NEXT: Default Regalloc Priority Advisor
@@ -45,13 +45,6 @@
; CHECK-NEXT: Canonicalize Freeze Instructions in Loops
; CHECK-NEXT: Induction Variable Users
; CHECK-NEXT: Loop Strength Reduction
-; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
-; CHECK-NEXT: Function Alias Analysis Results
-; CHECK-NEXT: Merge contiguous icmps into a memcmp
-; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: Lazy Branch Probability Analysis
-; CHECK-NEXT: Lazy Block Frequency Analysis
-; CHECK-NEXT: Expand memcmp() to load/stores
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Lower constant intrinsics
diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
deleted file mode 100644
index 1da40d46aa7730..00000000000000
--- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
+++ /dev/null
@@ -1,168 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux-gnu"
-
- at zeroEqualityTest01.buffer1 = private unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 4], align 4
- at zeroEqualityTest01.buffer2 = private unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 3], align 4
- at zeroEqualityTest02.buffer1 = private unnamed_addr constant [4 x i32] [i32 4, i32 0, i32 0, i32 0], align 4
- at zeroEqualityTest02.buffer2 = private unnamed_addr constant [4 x i32] [i32 3, i32 0, i32 0, i32 0], align 4
- at zeroEqualityTest03.buffer1 = private unnamed_addr constant [4 x i32] [i32 0, i32 0, i32 0, i32 3], align 4
- at zeroEqualityTest03.buffer2 = private unnamed_addr constant [4 x i32] [i32 0, i32 0, i32 0, i32 4], align 4
- at zeroEqualityTest04.buffer1 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14], align 4
- at zeroEqualityTest04.buffer2 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 13], align 4
-
-declare signext i32 @memcmp(ptr nocapture, ptr nocapture, i64) local_unnamed_addr #1
-
-; Check 4 bytes - requires 1 load for each param.
-define signext i32 @zeroEqualityTest02(ptr %x, ptr %y) {
-; CHECK-LABEL: zeroEqualityTest02:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: lwz 4, 0(4)
-; CHECK-NEXT: xor 3, 3, 4
-; CHECK-NEXT: cntlzw 3, 3
-; CHECK-NEXT: srwi 3, 3, 5
-; CHECK-NEXT: xori 3, 3, 1
-; CHECK-NEXT: blr
- %call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 4)
- %not.cmp = icmp ne i32 %call, 0
- %. = zext i1 %not.cmp to i32
- ret i32 %.
-}
-
-; Check 16 bytes - requires 2 loads for each param (or use vectors?).
-define signext i32 @zeroEqualityTest01(ptr %x, ptr %y) {
-; CHECK-LABEL: zeroEqualityTest01:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld 5, 0(3)
-; CHECK-NEXT: ld 6, 0(4)
-; CHECK-NEXT: cmpld 5, 6
-; CHECK-NEXT: bne 0, .LBB1_2
-; CHECK-NEXT: # %bb.1: # %loadbb1
-; CHECK-NEXT: ld 5, 8(3)
-; CHECK-NEXT: ld 4, 8(4)
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: cmpld 5, 4
-; CHECK-NEXT: beqlr 0
-; CHECK-NEXT: .LBB1_2: # %res_block
-; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: blr
- %call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 16)
- %not.tobool = icmp ne i32 %call, 0
- %. = zext i1 %not.tobool to i32
- ret i32 %.
-}
-
-; Check 7 bytes - requires 3 loads for each param.
-define signext i32 @zeroEqualityTest03(ptr %x, ptr %y) {
-; CHECK-LABEL: zeroEqualityTest03:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lwz 5, 0(3)
-; CHECK-NEXT: lwz 6, 0(4)
-; CHECK-NEXT: cmplw 5, 6
-; CHECK-NEXT: bne 0, .LBB2_3
-; CHECK-NEXT: # %bb.1: # %loadbb1
-; CHECK-NEXT: lhz 5, 4(3)
-; CHECK-NEXT: lhz 6, 4(4)
-; CHECK-NEXT: cmplw 5, 6
-; CHECK-NEXT: bne 0, .LBB2_3
-; CHECK-NEXT: # %bb.2: # %loadbb2
-; CHECK-NEXT: lbz 5, 6(3)
-; CHECK-NEXT: lbz 4, 6(4)
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: cmplw 5, 4
-; CHECK-NEXT: beqlr 0
-; CHECK-NEXT: .LBB2_3: # %res_block
-; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: blr
- %call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 7)
- %not.lnot = icmp ne i32 %call, 0
- %cond = zext i1 %not.lnot to i32
- ret i32 %cond
-}
-
-; Validate with > 0
-define signext i32 @zeroEqualityTest04() {
-; CHECK-LABEL: zeroEqualityTest04:
-; CHECK: # %bb.0: # %loadbb
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: blr
- %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest02.buffer1, ptr @zeroEqualityTest02.buffer2, i64 16)
- %not.cmp = icmp slt i32 %call, 1
- %. = zext i1 %not.cmp to i32
- ret i32 %.
-}
-
-; Validate with < 0
-define signext i32 @zeroEqualityTest05() {
-; CHECK-LABEL: zeroEqualityTest05:
-; CHECK: # %bb.0: # %loadbb
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: blr
- %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest03.buffer1, ptr @zeroEqualityTest03.buffer2, i64 16)
- %call.lobit = lshr i32 %call, 31
- %call.lobit.not = xor i32 %call.lobit, 1
- ret i32 %call.lobit.not
-}
-
-; Validate with memcmp()?:
-define signext i32 @equalityFoldTwoConstants() {
-; CHECK-LABEL: equalityFoldTwoConstants:
-; CHECK: # %bb.0: # %loadbb
-; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: blr
- %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest04.buffer1, ptr @zeroEqualityTest04.buffer2, i64 16)
- %not.tobool = icmp eq i32 %call, 0
- %cond = zext i1 %not.tobool to i32
- ret i32 %cond
-}
-
-define signext i32 @equalityFoldOneConstant(ptr %X) {
-; CHECK-LABEL: equalityFoldOneConstant:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li 5, 1
-; CHECK-NEXT: ld 4, 0(3)
-; CHECK-NEXT: rldic 5, 5, 32, 31
-; CHECK-NEXT: cmpld 4, 5
-; CHECK-NEXT: bne 0, .LBB6_2
-; CHECK-NEXT: # %bb.1: # %loadbb1
-; CHECK-NEXT: lis 5, -32768
-; CHECK-NEXT: ld 4, 8(3)
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: ori 5, 5, 1
-; CHECK-NEXT: rldic 5, 5, 1, 30
-; CHECK-NEXT: cmpld 4, 5
-; CHECK-NEXT: beq 0, .LBB6_3
-; CHECK-NEXT: .LBB6_2: # %res_block
-; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: .LBB6_3: # %endblock
-; CHECK-NEXT: cntlzw 3, 3
-; CHECK-NEXT: srwi 3, 3, 5
-; CHECK-NEXT: blr
- %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest04.buffer1, ptr %X, i64 16)
- %not.tobool = icmp eq i32 %call, 0
- %cond = zext i1 %not.tobool to i32
- ret i32 %cond
-}
-
-define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
-; CHECK-LABEL: length2_eq_nobuiltin_attr:
-; CHECK: # %bb.0:
-; CHECK-NEXT: mflr 0
-; CHECK-NEXT: stdu 1, -32(1)
-; CHECK-NEXT: li 5, 2
-; CHECK-NEXT: std 0, 48(1)
-; CHECK-NEXT: bl memcmp
-; CHECK-NEXT: nop
-; CHECK-NEXT: cntlzw 3, 3
-; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
-; CHECK-NEXT: addi 1, 1, 32
-; CHECK-NEXT: ld 0, 16(1)
-; CHECK-NEXT: mtlr 0
-; CHECK-NEXT: blr
- %m = tail call signext i32 @memcmp(ptr %X, ptr %Y, i64 2) nobuiltin
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
diff --git a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
deleted file mode 100644
index 29910646c89371..00000000000000
--- a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=PPC64LE
-
-; This tests interaction between MergeICmp and expand-memcmp.
-
-%"struct.std::pair" = type { i32, i32 }
-
-define zeroext i1 @opeq1(
-; PPC64LE-LABEL: opeq1:
-; PPC64LE: # %bb.0: # %"entry+land.rhs.i"
-; PPC64LE-NEXT: ld 3, 0(3)
-; PPC64LE-NEXT: ld 4, 0(4)
-; PPC64LE-NEXT: cmpd 3, 4
-; PPC64LE-NEXT: li 3, 0
-; PPC64LE-NEXT: li 4, 1
-; PPC64LE-NEXT: iseleq 3, 4, 3
-; PPC64LE-NEXT: blr
- ptr nocapture readonly dereferenceable(8) %a,
- ptr nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
-entry:
- %0 = load i32, ptr %a, align 4
- %1 = load i32, ptr %b, align 4
- %cmp.i = icmp eq i32 %0, %1
- br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
-
-land.rhs.i:
- %second.i = getelementptr inbounds %"struct.std::pair", ptr %a, i64 0, i32 1
- %2 = load i32, ptr %second.i, align 4
- %second2.i = getelementptr inbounds %"struct.std::pair", ptr %b, i64 0, i32 1
- %3 = load i32, ptr %second2.i, align 4
- %cmp3.i = icmp eq i32 %2, %3
- br label %opeq1.exit
-
-opeq1.exit:
- %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
- ret i1 %4
-}
-
-
diff --git a/llvm/test/CodeGen/PowerPC/memcmp.ll b/llvm/test/CodeGen/PowerPC/memcmp.ll
deleted file mode 100644
index 0634534b9c9df1..00000000000000
--- a/llvm/test/CodeGen/PowerPC/memcmp.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECK
-
-define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
-; CHECK-LABEL: memcmp8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ldbrx 3, 0, 3
-; CHECK-NEXT: ldbrx 4, 0, 4
-; CHECK-NEXT: subc 5, 4, 3
-; CHECK-NEXT: subfe 5, 4, 4
-; CHECK-NEXT: subc 4, 3, 4
-; CHECK-NEXT: subfe 3, 3, 3
-; CHECK-NEXT: neg 5, 5
-; CHECK-NEXT: neg 3, 3
-; CHECK-NEXT: sub 3, 5, 3
-; CHECK-NEXT: extsw 3, 3
-; CHECK-NEXT: blr
- %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8)
- ret i32 %call
-}
-
-define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
-; CHECK-LABEL: memcmp4:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lwbrx 3, 0, 3
-; CHECK-NEXT: lwbrx 4, 0, 4
-; CHECK-NEXT: sub 5, 4, 3
-; CHECK-NEXT: sub 3, 3, 4
-; CHECK-NEXT: rldicl 5, 5, 1, 63
-; CHECK-NEXT: rldicl 3, 3, 1, 63
-; CHECK-NEXT: sub 3, 5, 3
-; CHECK-NEXT: extsw 3, 3
-; CHECK-NEXT: blr
- %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)
- ret i32 %call
-}
-
-define signext i32 @memcmp2(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
-; CHECK-LABEL: memcmp2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lhbrx 3, 0, 3
-; CHECK-NEXT: lhbrx 4, 0, 4
-; CHECK-NEXT: sub 3, 3, 4
-; CHECK-NEXT: extsw 3, 3
-; CHECK-NEXT: blr
- %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 2)
- ret i32 %call
-}
-
-define signext i32 @memcmp1(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
-; CHECK-LABEL: memcmp1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lbz 3, 0(3)
-; CHECK-NEXT: lbz 4, 0(4)
-; CHECK-NEXT: sub 3, 3, 4
-; CHECK-NEXT: extsw 3, 3
-; CHECK-NEXT: blr
- %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 1) #2
- ret i32 %call
-}
-
-declare signext i32 @memcmp(ptr, ptr, i64)
diff --git a/llvm/test/CodeGen/PowerPC/memcmpIR.ll b/llvm/test/CodeGen/PowerPC/memcmpIR.ll
deleted file mode 100644
index 0a8bec7dc0e3f1..00000000000000
--- a/llvm/test/CodeGen/PowerPC/memcmpIR.ll
+++ /dev/null
@@ -1,178 +0,0 @@
-; RUN: llc -o - -mtriple=powerpc64le-unknown-gnu-linux -stop-after codegenprepare %s | FileCheck %s
-; RUN: llc -o - -mtriple=powerpc64-unknown-gnu-linux -stop-after codegenprepare %s | FileCheck %s --check-prefix=CHECK-BE
-
-define signext i32 @test1(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
-entry:
- ; CHECK-LABEL: @test1(
- ; CHECK-LABEL: res_block:{{.*}}
- ; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64
- ; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
- ; CHECK-NEXT: br label %endblock
-
- ; CHECK-LABEL: loadbb:{{.*}}
- ; CHECK: [[LOAD1:%[0-9]+]] = load i64, ptr
- ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr
- ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
- ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
- ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]
- ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block
-
- ; CHECK-LABEL: loadbb1:{{.*}}
- ; CHECK-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, ptr {{.*}}, i64 8
- ; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, ptr {{.*}}, i64 8
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, ptr [[GEP1]]
- ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr [[GEP2]]
- ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
- ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
- ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]
- ; CHECK-NEXT: br i1 [[ICMP]], label %endblock, label %res_block
-
- ; CHECK-BE-LABEL: @test1(
- ; CHECK-BE-LABEL: res_block:{{.*}}
- ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
- ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
- ; CHECK-BE-NEXT: br label %endblock
-
- ; CHECK-BE-LABEL: loadbb:{{.*}}
- ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, ptr
- ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr
- ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
- ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block
-
- ; CHECK-BE-LABEL: loadbb1:{{.*}}
- ; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, ptr {{.*}}, i64 8
- ; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, ptr {{.*}}, i64 8
- ; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, ptr [[GEP1]]
- ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr [[GEP2]]
- ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
- ; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block
-
- %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 16)
- ret i32 %call
-}
-
-declare signext i32 @memcmp(ptr nocapture, ptr nocapture, i64) local_unnamed_addr #1
-
-define signext i32 @test2(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
- ; CHECK-LABEL: @test2(
- ; CHECK: [[LOAD1:%[0-9]+]] = load i32, ptr
- ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr
- ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
- ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
- ; CHECK-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[BSWAP1]], [[BSWAP2]]
- ; CHECK-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[BSWAP1]], [[BSWAP2]]
- ; CHECK-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
- ; CHECK-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
- ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
- ; CHECK-NEXT: ret i32 [[SUB]]
-
- ; CHECK-BE-LABEL: @test2(
- ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, ptr
- ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr
- ; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[LOAD1]], [[LOAD2]]
- ; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]]
- ; CHECK-BE-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
- ; CHECK-BE-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
- ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
- ; CHECK-BE-NEXT: ret i32 [[SUB]]
-
-entry:
- %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)
- ret i32 %call
-}
-
-define signext i32 @test3(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
- ; CHECK-LABEL: res_block:{{.*}}
- ; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64
- ; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
- ; CHECK-NEXT: br label %endblock
-
- ; CHECK-LABEL: loadbb:{{.*}}
- ; CHECK: [[LOAD1:%[0-9]+]] = load i64, ptr
- ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr
- ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
- ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
- ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]
- ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block
-
- ; CHECK-LABEL: loadbb1:{{.*}}
- ; CHECK: [[LOAD1:%[0-9]+]] = load i32, ptr
- ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr
- ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
- ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
- ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64
- ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64
- ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
- ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block
-
- ; CHECK-LABEL: loadbb2:{{.*}}
- ; CHECK: [[LOAD1:%[0-9]+]] = load i16, ptr
- ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i16, ptr
- ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD1]])
- ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD2]])
- ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[BSWAP1]] to i64
- ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[BSWAP2]] to i64
- ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
- ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block
-
- ; CHECK-LABEL: loadbb3:{{.*}}
- ; CHECK: [[LOAD1:%[0-9]+]] = load i8, ptr
- ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i8, ptr
- ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32
- ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32
- ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
- ; CHECK-NEXT: br label %endblock
-
- ; CHECK-BE-LABEL: res_block:{{.*}}
- ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
- ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
- ; CHECK-BE-NEXT: br label %endblock
-
- ; CHECK-BE-LABEL: loadbb:{{.*}}
- ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, ptr
- ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr
- ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
- ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block
-
- ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, ptr
- ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr
- ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64
- ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64
- ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
- ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block
-
- ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i16, ptr
- ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i16, ptr
- ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[LOAD1]] to i64
- ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[LOAD2]] to i64
- ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
- ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block
-
- ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i8, ptr
- ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i8, ptr
- ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32
- ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32
- ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
- ; CHECK-BE-NEXT: br label %endblock
-
-entry:
- %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 15)
- ret i32 %call
-}
- ; CHECK: call = tail call signext i32 @memcmp
- ; CHECK-BE: call = tail call signext i32 @memcmp
-define signext i32 @test4(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
-
-entry:
- %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 65)
- ret i32 %call
-}
-
-define signext i32 @test5(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2, i32 signext %SIZE) {
- ; CHECK: call = tail call signext i32 @memcmp
- ; CHECK-BE: call = tail call signext i32 @memcmp
-entry:
- %conv = sext i32 %SIZE to i64
- %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 %conv)
- ret i32 %call
-}
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index e7db8ef9d5aff3..8b07c7015dcceb 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -45,13 +45,6 @@
; CHECK-NEXT: Canonicalize Freeze Instructions in Loops
; CHECK-NEXT: Induction Variable Users
; CHECK-NEXT: Loop Strength Reduction
-; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
-; CHECK-NEXT: Function Alias Analysis Results
-; CHECK-NEXT: Merge contiguous icmps into a memcmp
-; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: Lazy Branch Probability Analysis
-; CHECK-NEXT: Lazy Block Frequency Analysis
-; CHECK-NEXT: Expand memcmp() to load/stores
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Lower constant intrinsics
@@ -193,7 +186,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Stack Frame Layout Analysis
; CHECK-NEXT: RISC-V Zcmp move merging pass
-; CHECK-NEXT: RISC-V Zcmp Push/Pop optimization pass
+; CHECK-NEXT: RISC-V Zcmp Push/Pop optimization pass
; CHECK-NEXT: RISC-V pseudo instruction expansion pass
; CHECK-NEXT: RISC-V atomic pseudo instruction expansion pass
; CHECK-NEXT: Unpack machine instruction bundles
diff --git a/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll b/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll
deleted file mode 100644
index c16e2adb7a0783..00000000000000
--- a/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
-
-; This tests interaction between MergeICmp and ExpandMemCmp.
-
-%"struct.std::pair" = type { i32, i32 }
-
-define zeroext i1 @opeq1(
-; X86-LABEL: opeq1:
-; X86: # %bb.0: # %"entry+land.rhs.i"
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %ecx
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: xorl 4(%eax), %ecx
-; X86-NEXT: orl %edx, %ecx
-; X86-NEXT: sete %al
-; X86-NEXT: retl
-;
-; X64-LABEL: opeq1:
-; X64: # %bb.0: # %"entry+land.rhs.i"
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: cmpq (%rsi), %rax
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- ptr nocapture readonly dereferenceable(8) %a,
- ptr nocapture readonly dereferenceable(8) %b) local_unnamed_addr nofree nosync {
-entry:
- %0 = load i32, ptr %a, align 4
- %1 = load i32, ptr %b, align 4
- %cmp.i = icmp eq i32 %0, %1
- br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
-
-land.rhs.i:
- %second.i = getelementptr inbounds %"struct.std::pair", ptr %a, i64 0, i32 1
- %2 = load i32, ptr %second.i, align 4
- %second2.i = getelementptr inbounds %"struct.std::pair", ptr %b, i64 0, i32 1
- %3 = load i32, ptr %second2.i, align 4
- %cmp3.i = icmp eq i32 %2, %3
- br label %opeq1.exit
-
-opeq1.exit:
- %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
- ret i1 %4
-}
-
-
diff --git a/llvm/test/CodeGen/X86/memcmp-minsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-minsize-x32.ll
deleted file mode 100644
index ae1320f8b0868b..00000000000000
--- a/llvm/test/CodeGen/X86/memcmp-minsize-x32.ll
+++ /dev/null
@@ -1,445 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2
-
-; This tests codegen time inlining/optimization of memcmp
-; rdar://6480398
-
- at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
-
-declare dso_local i32 @memcmp(ptr, ptr, i32)
-
-define i32 @length2(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length2:
-; X86: # %bb.0:
-; X86-NEXT: pushl $2
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- ret i32 %m
-}
-
-define i1 @length2_eq(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length2_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: cmpw (%eax), %cx
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_const(ptr %X) nounwind minsize {
-; X86-LABEL: length2_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpw $12849, (%eax) # imm = 0x3231
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length2_eq_nobuiltin_attr:
-; X86: # %bb.0:
-; X86-NEXT: pushl $2
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length3(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length3:
-; X86: # %bb.0:
-; X86-NEXT: pushl $3
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
- ret i32 %m
-}
-
-define i1 @length3_eq(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length3_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $3
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length4(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length4:
-; X86: # %bb.0:
-; X86-NEXT: pushl $4
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- ret i32 %m
-}
-
-define i1 @length4_eq(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length4_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: cmpl (%eax), %ecx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_eq_const(ptr %X) nounwind minsize {
-; X86-LABEL: length4_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length5(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length5:
-; X86: # %bb.0:
-; X86-NEXT: pushl $5
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
- ret i32 %m
-}
-
-define i1 @length5_eq(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length5_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $5
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length8(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length8:
-; X86: # %bb.0:
-; X86-NEXT: pushl $8
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
- ret i32 %m
-}
-
-define i1 @length8_eq(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length8_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $8
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length8_eq_const(ptr %X) nounwind minsize {
-; X86-LABEL: length8_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $8
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length12_eq(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length12_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $12
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length12(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length12:
-; X86: # %bb.0:
-; X86-NEXT: pushl $12
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
- ret i32 %m
-}
-
-; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
-
-define i32 @length16(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length16:
-; X86: # %bb.0:
-; X86-NEXT: pushl $16
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind
- ret i32 %m
-}
-
-define i1 @length16_eq(ptr %x, ptr %y) nounwind minsize {
-; X86-NOSSE-LABEL: length16_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $16
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length16_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu (%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_eq_const(ptr %X) nounwind minsize {
-; X86-NOSSE-LABEL: length16_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $16
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length16_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
-
-define i32 @length24(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length24:
-; X86: # %bb.0:
-; X86-NEXT: pushl $24
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind
- ret i32 %m
-}
-
-define i1 @length24_eq(ptr %x, ptr %y) nounwind minsize {
-; X86-LABEL: length24_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $24
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_eq_const(ptr %X) nounwind minsize {
-; X86-LABEL: length24_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $24
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length32(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length32:
-; X86: # %bb.0:
-; X86-NEXT: pushl $32
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind
- ret i32 %m
-}
-
-; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
-
-define i1 @length32_eq(ptr %x, ptr %y) nounwind minsize {
-; X86-LABEL: length32_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $32
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_const(ptr %X) nounwind minsize {
-; X86-LABEL: length32_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $32
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length64(ptr %X, ptr %Y) nounwind minsize {
-; X86-LABEL: length64:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind
- ret i32 %m
-}
-
-define i1 @length64_eq(ptr %x, ptr %y) nounwind minsize {
-; X86-LABEL: length64_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_eq_const(ptr %X) nounwind minsize {
-; X86-LABEL: length64_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
diff --git a/llvm/test/CodeGen/X86/memcmp-minsize.ll b/llvm/test/CodeGen/X86/memcmp-minsize.ll
deleted file mode 100644
index 544d1c49f26b99..00000000000000
--- a/llvm/test/CodeGen/X86/memcmp-minsize.ll
+++ /dev/null
@@ -1,433 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
-
-; This tests codegen time inlining/optimization of memcmp
-; rdar://6480398
-
- at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
-
-declare dso_local i32 @memcmp(ptr, ptr, i64)
-
-define i32 @length2(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length2:
-; X64: # %bb.0:
-; X64-NEXT: pushq $2
-; X64-NEXT: popq %rdx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- ret i32 %m
-}
-
-define i1 @length2_eq(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length2_eq:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: cmpw (%rsi), %ax
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_const(ptr %X) nounwind minsize {
-; X64-LABEL: length2_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: cmpw $12849, (%rdi) # imm = 0x3231
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length2_eq_nobuiltin_attr:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: pushq $2
-; X64-NEXT: popq %rdx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length3(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length3:
-; X64: # %bb.0:
-; X64-NEXT: pushq $3
-; X64-NEXT: popq %rdx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
- ret i32 %m
-}
-
-define i1 @length3_eq(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length3_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: pushq $3
-; X64-NEXT: popq %rdx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length4(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length4:
-; X64: # %bb.0:
-; X64-NEXT: pushq $4
-; X64-NEXT: popq %rdx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- ret i32 %m
-}
-
-define i1 @length4_eq(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length4_eq:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: cmpl (%rsi), %eax
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_eq_const(ptr %X) nounwind minsize {
-; X64-LABEL: length4_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length5(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length5:
-; X64: # %bb.0:
-; X64-NEXT: pushq $5
-; X64-NEXT: popq %rdx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- ret i32 %m
-}
-
-define i1 @length5_eq(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length5_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: pushq $5
-; X64-NEXT: popq %rdx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length8(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length8:
-; X64: # %bb.0:
-; X64-NEXT: pushq $8
-; X64-NEXT: popq %rdx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
- ret i32 %m
-}
-
-define i1 @length8_eq(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length8_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: cmpq (%rsi), %rax
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length8_eq_const(ptr %X) nounwind minsize {
-; X64-LABEL: length8_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
-; X64-NEXT: cmpq %rax, (%rdi)
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length12_eq(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length12_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: pushq $12
-; X64-NEXT: popq %rdx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length12(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length12:
-; X64: # %bb.0:
-; X64-NEXT: pushq $12
-; X64-NEXT: popq %rdx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
- ret i32 %m
-}
-
-; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
-
-define i32 @length16(ptr %X, ptr %Y) nounwind minsize {
-;
-; X64-LABEL: length16:
-; X64: # %bb.0:
-; X64-NEXT: pushq $16
-; X64-NEXT: popq %rdx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
- ret i32 %m
-}
-
-define i1 @length16_eq(ptr %x, ptr %y) nounwind minsize {
-; X64-SSE2-LABEL: length16_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm0
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX-LABEL: length16_eq:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: setne %al
-; X64-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_eq_const(ptr %X) nounwind minsize {
-; X64-SSE2-LABEL: length16_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX-LABEL: length16_eq_const:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
-
-define i32 @length24(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length24:
-; X64: # %bb.0:
-; X64-NEXT: pushq $24
-; X64-NEXT: popq %rdx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
- ret i32 %m
-}
-
-define i1 @length24_eq(ptr %x, ptr %y) nounwind minsize {
-; X64-LABEL: length24_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: pushq $24
-; X64-NEXT: popq %rdx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_eq_const(ptr %X) nounwind minsize {
-; X64-LABEL: length24_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: pushq $24
-; X64-NEXT: popq %rdx
-; X64-NEXT: movl $.L.str, %esi
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length32(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length32:
-; X64: # %bb.0:
-; X64-NEXT: pushq $32
-; X64-NEXT: popq %rdx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
- ret i32 %m
-}
-
-; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
-
-define i1 @length32_eq(ptr %x, ptr %y) nounwind minsize {
-; X64-SSE2-LABEL: length32_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pushq %rax
-; X64-SSE2-NEXT: pushq $32
-; X64-SSE2-NEXT: popq %rdx
-; X64-SSE2-NEXT: callq memcmp
-; X64-SSE2-NEXT: testl %eax, %eax
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: popq %rcx
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX1-LABEL: length32_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length32_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_const(ptr %X) nounwind minsize {
-; X64-SSE2-LABEL: length32_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pushq %rax
-; X64-SSE2-NEXT: pushq $32
-; X64-SSE2-NEXT: popq %rdx
-; X64-SSE2-NEXT: movl $.L.str, %esi
-; X64-SSE2-NEXT: callq memcmp
-; X64-SSE2-NEXT: testl %eax, %eax
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: popq %rcx
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX1-LABEL: length32_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length32_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length64(ptr %X, ptr %Y) nounwind minsize {
-; X64-LABEL: length64:
-; X64: # %bb.0:
-; X64-NEXT: pushq $64
-; X64-NEXT: popq %rdx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
- ret i32 %m
-}
-
-define i1 @length64_eq(ptr %x, ptr %y) nounwind minsize {
-; X64-LABEL: length64_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: pushq $64
-; X64-NEXT: popq %rdx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_eq_const(ptr %X) nounwind minsize {
-; X64-LABEL: length64_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: pushq $64
-; X64-NEXT: popq %rdx
-; X64-NEXT: movl $.L.str, %esi
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
deleted file mode 100644
index 0253d131226083..00000000000000
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
+++ /dev/null
@@ -1,2911 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; NOTE: This is a copy of llvm/test/CodeGen/X86/memcmp.ll with more load pairs. Please keep it that way.
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,X86-SSE1
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86,X86-SSE41
-
-; This tests codegen time inlining/optimization of memcmp
-; rdar://6480398
-
- at .str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1
-
-declare dso_local i32 @memcmp(ptr, ptr, i32)
-
-define i32 @length0(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length0:
-; X86: # %bb.0:
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind
- ret i32 %m
- }
-
-define i1 @length0_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length0_eq:
-; X86: # %bb.0:
-; X86-NEXT: movb $1, %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length0_lt:
-; X86: # %bb.0:
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length2(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length2:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- ret i32 %m
-}
-
-define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length2_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: cmpw (%eax), %cx
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length2_lt:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length2_gt:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movzwl %cx, %ecx
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: subl %eax, %ecx
-; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_const(ptr %X) nounwind {
-; X86-LABEL: length2_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: cmpl $12849, %eax # imm = 0x3231
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length2_eq_nobuiltin_attr:
-; X86: # %bb.0:
-; X86-NEXT: pushl $2
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length3(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length3:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: movzwl (%ecx), %esi
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: rolw $8, %si
-; X86-NEXT: cmpw %si, %dx
-; X86-NEXT: jne .LBB9_3
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movzbl 2(%eax), %eax
-; X86-NEXT: movzbl 2(%ecx), %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
-; X86-NEXT: .LBB9_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpw %si, %dx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
- ret i32 %m
-}
-
-define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length3_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %edx
-; X86-NEXT: xorw (%eax), %dx
-; X86-NEXT: movzbl 2(%ecx), %ecx
-; X86-NEXT: xorb 2(%eax), %cl
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: orw %dx, %ax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length4(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length4:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: seta %al
-; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- ret i32 %m
-}
-
-define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length4_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: cmpl (%eax), %ecx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length4_lt:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %eax
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %eax
-; X86-NEXT: cmpl %eax, %ecx
-; X86-NEXT: setb %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length4_gt:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %eax
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %eax
-; X86-NEXT: cmpl %eax, %ecx
-; X86-NEXT: seta %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_eq_const(ptr %X) nounwind {
-; X86-LABEL: length4_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length5(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length5:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: movl (%ecx), %esi
-; X86-NEXT: bswapl %edx
-; X86-NEXT: bswapl %esi
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: jne .LBB16_3
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movzbl 4(%eax), %eax
-; X86-NEXT: movzbl 4(%ecx), %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
-; X86-NEXT: .LBB16_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
- ret i32 %m
-}
-
-define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length5_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: movzbl 4(%ecx), %ecx
-; X86-NEXT: xorb 4(%eax), %cl
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: orl %edx, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length5_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: movl (%ecx), %esi
-; X86-NEXT: bswapl %edx
-; X86-NEXT: bswapl %esi
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: jne .LBB18_3
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movzbl 4(%eax), %eax
-; X86-NEXT: movzbl 4(%ecx), %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: jmp .LBB18_2
-; X86-NEXT: .LBB18_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB18_2: # %endblock
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length7(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length7:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl (%esi), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB19_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 3(%esi), %ecx
-; X86-NEXT: movl 3(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: je .LBB19_3
-; X86-NEXT: .LBB19_2: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB19_3: # %endblock
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind
- ret i32 %m
-}
-
-define i1 @length7_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length7_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 3(%ecx), %ecx
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: xorl 3(%eax), %ecx
-; X86-NEXT: orl %edx, %ecx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length7_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl (%esi), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB21_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 3(%esi), %ecx
-; X86-NEXT: movl 3(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: je .LBB21_3
-; X86-NEXT: .LBB21_2: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB21_3: # %endblock
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length8(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length8:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl (%esi), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB22_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 4(%esi), %ecx
-; X86-NEXT: movl 4(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: je .LBB22_3
-; X86-NEXT: .LBB22_2: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB22_3: # %endblock
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
- ret i32 %m
-}
-
-define i1 @length8_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length8_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %ecx
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: xorl 4(%eax), %ecx
-; X86-NEXT: orl %edx, %ecx
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length8_eq_const(ptr %X) nounwind {
-; X86-LABEL: length8_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130
-; X86-NEXT: xorl (%eax), %ecx
-; X86-NEXT: movl $926299444, %edx # imm = 0x37363534
-; X86-NEXT: xorl 4(%eax), %edx
-; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length9_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %esi
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: xorl 4(%eax), %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: movzbl 8(%ecx), %ecx
-; X86-NEXT: xorb 8(%eax), %cl
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length10_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length10_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %esi
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: xorl 4(%eax), %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: movzwl 8(%ecx), %ecx
-; X86-NEXT: xorw 8(%eax), %cx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 10) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length11_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length11_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %esi
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: xorl 4(%eax), %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: movl 7(%ecx), %ecx
-; X86-NEXT: xorl 7(%eax), %ecx
-; X86-NEXT: orl %esi, %ecx
-; X86-NEXT: sete %al
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 11) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length12_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length12_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %esi
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: xorl 4(%eax), %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: movl 8(%ecx), %ecx
-; X86-NEXT: xorl 8(%eax), %ecx
-; X86-NEXT: orl %esi, %ecx
-; X86-NEXT: setne %al
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length12(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length12:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl (%esi), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB29_3
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 4(%esi), %ecx
-; X86-NEXT: movl 4(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB29_3
-; X86-NEXT: # %bb.2: # %loadbb2
-; X86-NEXT: movl 8(%esi), %ecx
-; X86-NEXT: movl 8(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: je .LBB29_4
-; X86-NEXT: .LBB29_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB29_4: # %endblock
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
- ret i32 %m
-}
-
-define i1 @length13_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length13_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl (%edx), %esi
-; X86-NEXT: movl 4(%edx), %eax
-; X86-NEXT: xorl (%ecx), %esi
-; X86-NEXT: xorl 4(%ecx), %eax
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: movl 8(%edx), %esi
-; X86-NEXT: xorl 8(%ecx), %esi
-; X86-NEXT: movzbl 12(%edx), %edx
-; X86-NEXT: xorb 12(%ecx), %dl
-; X86-NEXT: movzbl %dl, %ecx
-; X86-NEXT: orl %esi, %ecx
-; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: sete %al
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 13) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length14_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length14_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl (%edx), %esi
-; X86-NEXT: movl 4(%edx), %eax
-; X86-NEXT: xorl (%ecx), %esi
-; X86-NEXT: xorl 4(%ecx), %eax
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: movl 8(%edx), %esi
-; X86-NEXT: xorl 8(%ecx), %esi
-; X86-NEXT: movzwl 12(%edx), %edx
-; X86-NEXT: xorw 12(%ecx), %dx
-; X86-NEXT: movzwl %dx, %ecx
-; X86-NEXT: orl %esi, %ecx
-; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: sete %al
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 14) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length15_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length15_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl (%edx), %esi
-; X86-NEXT: movl 4(%edx), %eax
-; X86-NEXT: xorl (%ecx), %esi
-; X86-NEXT: xorl 4(%ecx), %eax
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: movl 8(%edx), %esi
-; X86-NEXT: xorl 8(%ecx), %esi
-; X86-NEXT: movl 11(%edx), %edx
-; X86-NEXT: xorl 11(%ecx), %edx
-; X86-NEXT: orl %esi, %edx
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: sete %al
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
-
-define i32 @length16(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length16:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl (%esi), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB33_4
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 4(%esi), %ecx
-; X86-NEXT: movl 4(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB33_4
-; X86-NEXT: # %bb.2: # %loadbb2
-; X86-NEXT: movl 8(%esi), %ecx
-; X86-NEXT: movl 8(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB33_4
-; X86-NEXT: # %bb.3: # %loadbb3
-; X86-NEXT: movl 12(%esi), %ecx
-; X86-NEXT: movl 12(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: je .LBB33_5
-; X86-NEXT: .LBB33_4: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB33_5: # %endblock
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind
- ret i32 %m
-}
-
-define i1 @length16_eq(ptr %x, ptr %y) nounwind {
-; X86-NOSSE-LABEL: length16_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl (%edx), %esi
-; X86-NOSSE-NEXT: movl 4(%edx), %eax
-; X86-NOSSE-NEXT: xorl (%ecx), %esi
-; X86-NOSSE-NEXT: xorl 4(%ecx), %eax
-; X86-NOSSE-NEXT: orl %esi, %eax
-; X86-NOSSE-NEXT: movl 8(%edx), %esi
-; X86-NOSSE-NEXT: xorl 8(%ecx), %esi
-; X86-NOSSE-NEXT: movl 12(%edx), %edx
-; X86-NOSSE-NEXT: xorl 12(%ecx), %edx
-; X86-NOSSE-NEXT: orl %esi, %edx
-; X86-NOSSE-NEXT: orl %eax, %edx
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: popl %esi
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length16_eq:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl %esi
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SSE1-NEXT: movl (%edx), %esi
-; X86-SSE1-NEXT: movl 4(%edx), %eax
-; X86-SSE1-NEXT: xorl (%ecx), %esi
-; X86-SSE1-NEXT: xorl 4(%ecx), %eax
-; X86-SSE1-NEXT: orl %esi, %eax
-; X86-SSE1-NEXT: movl 8(%edx), %esi
-; X86-SSE1-NEXT: xorl 8(%ecx), %esi
-; X86-SSE1-NEXT: movl 12(%edx), %edx
-; X86-SSE1-NEXT: xorl 12(%ecx), %edx
-; X86-SSE1-NEXT: orl %esi, %edx
-; X86-SSE1-NEXT: orl %eax, %edx
-; X86-SSE1-NEXT: setne %al
-; X86-SSE1-NEXT: popl %esi
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length16_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu (%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length16_eq:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu (%eax), %xmm1
-; X86-SSE41-NEXT: pxor %xmm0, %xmm1
-; X86-SSE41-NEXT: ptest %xmm1, %xmm1
-; X86-SSE41-NEXT: setne %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length16_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl (%esi), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB35_4
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 4(%esi), %ecx
-; X86-NEXT: movl 4(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB35_4
-; X86-NEXT: # %bb.2: # %loadbb2
-; X86-NEXT: movl 8(%esi), %ecx
-; X86-NEXT: movl 8(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB35_4
-; X86-NEXT: # %bb.3: # %loadbb3
-; X86-NEXT: movl 12(%esi), %ecx
-; X86-NEXT: movl 12(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: je .LBB35_5
-; X86-NEXT: .LBB35_4: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB35_5: # %endblock
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length16_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl (%esi), %eax
-; X86-NEXT: movl (%edx), %ecx
-; X86-NEXT: bswapl %eax
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: cmpl %ecx, %eax
-; X86-NEXT: jne .LBB36_4
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 4(%esi), %eax
-; X86-NEXT: movl 4(%edx), %ecx
-; X86-NEXT: bswapl %eax
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: cmpl %ecx, %eax
-; X86-NEXT: jne .LBB36_4
-; X86-NEXT: # %bb.2: # %loadbb2
-; X86-NEXT: movl 8(%esi), %eax
-; X86-NEXT: movl 8(%edx), %ecx
-; X86-NEXT: bswapl %eax
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: cmpl %ecx, %eax
-; X86-NEXT: jne .LBB36_4
-; X86-NEXT: # %bb.3: # %loadbb3
-; X86-NEXT: movl 12(%esi), %eax
-; X86-NEXT: movl 12(%edx), %ecx
-; X86-NEXT: bswapl %eax
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpl %ecx, %eax
-; X86-NEXT: je .LBB36_5
-; X86-NEXT: .LBB36_4: # %res_block
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpl %ecx, %eax
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: orl $1, %edx
-; X86-NEXT: .LBB36_5: # %endblock
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: setg %al
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_eq_const(ptr %X) nounwind {
-; X86-NOSSE-LABEL: length16_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl $858927408, %ecx # imm = 0x33323130
-; X86-NOSSE-NEXT: xorl (%eax), %ecx
-; X86-NOSSE-NEXT: movl $926299444, %edx # imm = 0x37363534
-; X86-NOSSE-NEXT: xorl 4(%eax), %edx
-; X86-NOSSE-NEXT: orl %ecx, %edx
-; X86-NOSSE-NEXT: movl $825243960, %ecx # imm = 0x31303938
-; X86-NOSSE-NEXT: xorl 8(%eax), %ecx
-; X86-NOSSE-NEXT: movl $892613426, %esi # imm = 0x35343332
-; X86-NOSSE-NEXT: xorl 12(%eax), %esi
-; X86-NOSSE-NEXT: orl %ecx, %esi
-; X86-NOSSE-NEXT: orl %edx, %esi
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: popl %esi
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length16_eq_const:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl %esi
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE1-NEXT: movl $858927408, %ecx # imm = 0x33323130
-; X86-SSE1-NEXT: xorl (%eax), %ecx
-; X86-SSE1-NEXT: movl $926299444, %edx # imm = 0x37363534
-; X86-SSE1-NEXT: xorl 4(%eax), %edx
-; X86-SSE1-NEXT: orl %ecx, %edx
-; X86-SSE1-NEXT: movl $825243960, %ecx # imm = 0x31303938
-; X86-SSE1-NEXT: xorl 8(%eax), %ecx
-; X86-SSE1-NEXT: movl $892613426, %esi # imm = 0x35343332
-; X86-SSE1-NEXT: xorl 12(%eax), %esi
-; X86-SSE1-NEXT: orl %ecx, %esi
-; X86-SSE1-NEXT: orl %edx, %esi
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: popl %esi
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length16_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length16_eq_const:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movdqu (%eax), %xmm0
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
-
-define i32 @length24(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length24:
-; X86: # %bb.0:
-; X86-NEXT: pushl $24
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind
- ret i32 %m
-}
-
-define i1 @length24_eq(ptr %x, ptr %y) nounwind {
-; X86-NOSSE-LABEL: length24_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $24
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length24_eq:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $24
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length24_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length24_eq:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 8(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 8(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length24_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $24
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length24_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $24
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_eq_const(ptr %X) nounwind {
-; X86-NOSSE-LABEL: length24_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $24
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length24_eq_const:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $24
-; X86-SSE1-NEXT: pushl $.L.str
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: setne %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length24_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length24_eq_const:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movdqu (%eax), %xmm0
-; X86-SSE41-NEXT: movdqu 8(%eax), %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE41-NEXT: por %xmm1, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: setne %al
-; X86-SSE41-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length31(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length31:
-; X86: # %bb.0:
-; X86-NEXT: pushl $31
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 31) nounwind
- ret i32 %m
-}
-
-define i1 @length31_eq(ptr %x, ptr %y) nounwind {
-; X86-NOSSE-LABEL: length31_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $31
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length31_eq:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $31
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length31_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length31_eq:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length31_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $31
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length31_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $31
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; X86-NOSSE-LABEL: length31_eq_prefer128:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $31
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length31_eq_prefer128:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $31
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length31_eq_prefer128:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length31_eq_prefer128:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_eq_const(ptr %X) nounwind {
-; X86-NOSSE-LABEL: length31_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $31
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length31_eq_const:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $31
-; X86-SSE1-NEXT: pushl $.L.str
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: setne %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length31_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 15(%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length31_eq_const:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movdqu (%eax), %xmm0
-; X86-SSE41-NEXT: movdqu 15(%eax), %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE41-NEXT: por %xmm1, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: setne %al
-; X86-SSE41-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 31) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length32(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length32:
-; X86: # %bb.0:
-; X86-NEXT: pushl $32
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind
- ret i32 %m
-}
-
-; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
-
-define i1 @length32_eq(ptr %x, ptr %y) nounwind {
-; X86-NOSSE-LABEL: length32_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $32
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length32_eq:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $32
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length32_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length32_eq:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length32_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $32
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length32_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $32
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; X86-NOSSE-LABEL: length32_eq_prefer128:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $32
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length32_eq_prefer128:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $32
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length32_eq_prefer128:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length32_eq_prefer128:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_const(ptr %X) nounwind {
-; X86-NOSSE-LABEL: length32_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $32
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length32_eq_const:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $32
-; X86-SSE1-NEXT: pushl $.L.str
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: setne %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length32_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length32_eq_const:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movdqu (%eax), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE41-NEXT: por %xmm1, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: setne %al
-; X86-SSE41-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length48(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length48:
-; X86: # %bb.0:
-; X86-NEXT: pushl $48
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 48) nounwind
- ret i32 %m
-}
-
-define i1 @length48_eq(ptr %x, ptr %y) nounwind {
-; X86-NOSSE-LABEL: length48_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $48
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length48_eq:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $48
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length48_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: movdqu 32(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm2
-; X86-SSE2-NEXT: pand %xmm0, %xmm2
-; X86-SSE2-NEXT: pmovmskb %xmm2, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length48_eq:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: movdqu 32(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm1, %xmm2
-; X86-SSE41-NEXT: por %xmm0, %xmm2
-; X86-SSE41-NEXT: ptest %xmm2, %xmm2
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length48_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $48
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length48_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $48
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; X86-NOSSE-LABEL: length48_eq_prefer128:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $48
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length48_eq_prefer128:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $48
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length48_eq_prefer128:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: movdqu 32(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm2
-; X86-SSE2-NEXT: pand %xmm0, %xmm2
-; X86-SSE2-NEXT: pmovmskb %xmm2, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length48_eq_prefer128:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: movdqu 32(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm1, %xmm2
-; X86-SSE41-NEXT: por %xmm0, %xmm2
-; X86-SSE41-NEXT: ptest %xmm2, %xmm2
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_eq_const(ptr %X) nounwind {
-; X86-NOSSE-LABEL: length48_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $48
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length48_eq_const:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $48
-; X86-SSE1-NEXT: pushl $.L.str
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: setne %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length48_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
-; X86-SSE2-NEXT: pand %xmm0, %xmm2
-; X86-SSE2-NEXT: pmovmskb %xmm2, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length48_eq_const:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movdqu (%eax), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE41-NEXT: por %xmm1, %xmm0
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
-; X86-SSE41-NEXT: por %xmm0, %xmm2
-; X86-SSE41-NEXT: ptest %xmm2, %xmm2
-; X86-SSE41-NEXT: setne %al
-; X86-SSE41-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 48) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length63(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length63:
-; X86: # %bb.0:
-; X86-NEXT: pushl $63
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 63) nounwind
- ret i32 %m
-}
-
-define i1 @length63_eq(ptr %x, ptr %y) nounwind {
-; X86-NOSSE-LABEL: length63_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $63
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length63_eq:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $63
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: setne %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length63_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: movdqu 32(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm2
-; X86-SSE2-NEXT: movdqu 47(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu 47(%eax), %xmm3
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm3
-; X86-SSE2-NEXT: pand %xmm2, %xmm3
-; X86-SSE2-NEXT: pand %xmm0, %xmm3
-; X86-SSE2-NEXT: pmovmskb %xmm3, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length63_eq:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: movdqu 32(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm1, %xmm2
-; X86-SSE41-NEXT: movdqu 47(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu 47(%eax), %xmm3
-; X86-SSE41-NEXT: pxor %xmm1, %xmm3
-; X86-SSE41-NEXT: por %xmm2, %xmm3
-; X86-SSE41-NEXT: por %xmm0, %xmm3
-; X86-SSE41-NEXT: ptest %xmm3, %xmm3
-; X86-SSE41-NEXT: setne %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length63_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $63
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length63_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $63
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_eq_const(ptr %X) nounwind {
-; X86-NOSSE-LABEL: length63_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $63
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length63_eq_const:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $63
-; X86-SSE1-NEXT: pushl $.L.str
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length63_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE2-NEXT: movdqu 47(%eax), %xmm3
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
-; X86-SSE2-NEXT: pand %xmm3, %xmm2
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length63_eq_const:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movdqu (%eax), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE41-NEXT: movdqu 47(%eax), %xmm3
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
-; X86-SSE41-NEXT: por %xmm3, %xmm2
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE41-NEXT: por %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 63) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length64(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length64:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind
- ret i32 %m
-}
-
-define i1 @length64_eq(ptr %x, ptr %y) nounwind {
-; X86-NOSSE-LABEL: length64_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $64
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length64_eq:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $64
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: setne %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length64_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: movdqu 32(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm2
-; X86-SSE2-NEXT: movdqu 48(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu 48(%eax), %xmm3
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm3
-; X86-SSE2-NEXT: pand %xmm2, %xmm3
-; X86-SSE2-NEXT: pand %xmm0, %xmm3
-; X86-SSE2-NEXT: pmovmskb %xmm3, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length64_eq:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: movdqu 32(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm1, %xmm2
-; X86-SSE41-NEXT: movdqu 48(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu 48(%eax), %xmm3
-; X86-SSE41-NEXT: pxor %xmm1, %xmm3
-; X86-SSE41-NEXT: por %xmm2, %xmm3
-; X86-SSE41-NEXT: por %xmm0, %xmm3
-; X86-SSE41-NEXT: ptest %xmm3, %xmm3
-; X86-SSE41-NEXT: setne %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length64_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length64_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_eq_const(ptr %X) nounwind {
-; X86-NOSSE-LABEL: length64_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $64
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length64_eq_const:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $64
-; X86-SSE1-NEXT: pushl $.L.str
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length64_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE2-NEXT: movdqu 48(%eax), %xmm3
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
-; X86-SSE2-NEXT: pand %xmm3, %xmm2
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length64_eq_const:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movdqu (%eax), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2
-; X86-SSE41-NEXT: movdqu 48(%eax), %xmm3
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
-; X86-SSE41-NEXT: por %xmm3, %xmm2
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE41-NEXT: por %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length96(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length96:
-; X86: # %bb.0:
-; X86-NEXT: pushl $96
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 96) nounwind
- ret i32 %m
-}
-
-define i1 @length96_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length96_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $96
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length96_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $96
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length96_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $96
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_eq_const(ptr %X) nounwind {
-; X86-LABEL: length96_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $96
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 96) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length127(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length127:
-; X86: # %bb.0:
-; X86-NEXT: pushl $127
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 127) nounwind
- ret i32 %m
-}
-
-define i1 @length127_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length127_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $127
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length127_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $127
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length127_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $127
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_eq_const(ptr %X) nounwind {
-; X86-LABEL: length127_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $127
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 127) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length128(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length128:
-; X86: # %bb.0:
-; X86-NEXT: pushl $128
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 128) nounwind
- ret i32 %m
-}
-
-define i1 @length128_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length128_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $128
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length128_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $128
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length128_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $128
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_eq_const(ptr %X) nounwind {
-; X86-LABEL: length128_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $128
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 128) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length192(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length192:
-; X86: # %bb.0:
-; X86-NEXT: pushl $192
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 192) nounwind
- ret i32 %m
-}
-
-define i1 @length192_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length192_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $192
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length192_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $192
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length192_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $192
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_eq_const(ptr %X) nounwind {
-; X86-LABEL: length192_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $192
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 192) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length255(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length255:
-; X86: # %bb.0:
-; X86-NEXT: pushl $255
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 255) nounwind
- ret i32 %m
-}
-
-define i1 @length255_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length255_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $255
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length255_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $255
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length255_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $255
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_eq_const(ptr %X) nounwind {
-; X86-LABEL: length255_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $255
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 255) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length256(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length256:
-; X86: # %bb.0:
-; X86-NEXT: pushl $256 # imm = 0x100
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 256) nounwind
- ret i32 %m
-}
-
-define i1 @length256_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length256_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $256 # imm = 0x100
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length256_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $256 # imm = 0x100
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length256_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $256 # imm = 0x100
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_eq_const(ptr %X) nounwind {
-; X86-LABEL: length256_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $256 # imm = 0x100
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 256) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length384(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length384:
-; X86: # %bb.0:
-; X86-NEXT: pushl $384 # imm = 0x180
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 384) nounwind
- ret i32 %m
-}
-
-define i1 @length384_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length384_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $384 # imm = 0x180
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length384_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $384 # imm = 0x180
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length384_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $384 # imm = 0x180
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_eq_const(ptr %X) nounwind {
-; X86-LABEL: length384_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $384 # imm = 0x180
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 384) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length511(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length511:
-; X86: # %bb.0:
-; X86-NEXT: pushl $511 # imm = 0x1FF
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 511) nounwind
- ret i32 %m
-}
-
-define i1 @length511_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length511_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $511 # imm = 0x1FF
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length511_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $511 # imm = 0x1FF
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length511_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $511 # imm = 0x1FF
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_eq_const(ptr %X) nounwind {
-; X86-LABEL: length511_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $511 # imm = 0x1FF
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 511) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length512(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length512:
-; X86: # %bb.0:
-; X86-NEXT: pushl $512 # imm = 0x200
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 512) nounwind
- ret i32 %m
-}
-
-define i1 @length512_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length512_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $512 # imm = 0x200
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length512_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $512 # imm = 0x200
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length512_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $512 # imm = 0x200
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_eq_const(ptr %X) nounwind {
-; X86-LABEL: length512_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $512 # imm = 0x200
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 512) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; This checks that we do not do stupid things with huge sizes.
-define i32 @huge_length(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: huge_length:
-; X86: # %bb.0:
-; X86-NEXT: pushl $-1
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind
- ret i32 %m
-}
-
-define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: huge_length_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $-1
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; This checks non-constant sizes.
-define i32 @nonconst_length(ptr %X, ptr %Y, i32 %size) nounwind {
-; X86-LABEL: nonconst_length:
-; X86: # %bb.0:
-; X86-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind
- ret i32 %m
-}
-
-define i1 @nonconst_length_eq(ptr %X, ptr %Y, i32 %size) nounwind {
-; X86-LABEL: nonconst_length_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
deleted file mode 100644
index 6eb02bfc1fd0c3..00000000000000
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
+++ /dev/null
@@ -1,4006 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; NOTE: This is a copy of llvm/test/CodeGen/X86/memcmp.ll with more load pairs. Please keep it that way.
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512BW
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512F
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX2
-; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX512F
-
-; This tests codegen time inlining/optimization of memcmp
-; rdar://6480398
-
- at .str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1
-
-declare dso_local i32 @memcmp(ptr, ptr, i64)
-
-define i32 @length0(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length0:
-; X64: # %bb.0:
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
- ret i32 %m
- }
-
-define i1 @length0_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length0_eq:
-; X64: # %bb.0:
-; X64-NEXT: movb $1, %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length0_lt:
-; X64: # %bb.0:
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length2(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length2:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- ret i32 %m
-}
-
-define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length2_eq:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: cmpw (%rsi), %ax
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length2_lt:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length2_gt:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_const(ptr %X) nounwind {
-; X64-LABEL: length2_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length2_eq_nobuiltin_attr:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $2, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length3(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length3:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %ecx
-; X64-NEXT: movzwl (%rsi), %edx
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: rolw $8, %dx
-; X64-NEXT: cmpw %dx, %cx
-; X64-NEXT: jne .LBB9_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movzbl 2(%rdi), %eax
-; X64-NEXT: movzbl 2(%rsi), %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB9_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpw %dx, %cx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
- ret i32 %m
-}
-
-define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length3_eq:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: xorw (%rsi), %ax
-; X64-NEXT: movzbl 2(%rdi), %ecx
-; X64-NEXT: xorb 2(%rsi), %cl
-; X64-NEXT: movzbl %cl, %ecx
-; X64-NEXT: orw %ax, %cx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length4(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length4:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- ret i32 %m
-}
-
-define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length4_eq:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: cmpl (%rsi), %eax
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length4_lt:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: movl (%rsi), %ecx
-; X64-NEXT: bswapl %eax
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: cmpl %ecx, %eax
-; X64-NEXT: setb %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length4_gt:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: movl (%rsi), %ecx
-; X64-NEXT: bswapl %eax
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: cmpl %ecx, %eax
-; X64-NEXT: seta %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_eq_const(ptr %X) nounwind {
-; X64-LABEL: length4_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length5(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length5:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: jne .LBB16_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movzbl 4(%rdi), %eax
-; X64-NEXT: movzbl 4(%rsi), %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB16_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- ret i32 %m
-}
-
-define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length5_eq:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: xorl (%rsi), %eax
-; X64-NEXT: movzbl 4(%rdi), %ecx
-; X64-NEXT: xorb 4(%rsi), %cl
-; X64-NEXT: movzbl %cl, %ecx
-; X64-NEXT: orl %eax, %ecx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length5_lt:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: jne .LBB18_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movzbl 4(%rdi), %eax
-; X64-NEXT: movzbl 4(%rsi), %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB18_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length7(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length7:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: jne .LBB19_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movl 3(%rdi), %ecx
-; X64-NEXT: movl 3(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: je .LBB19_3
-; X64-NEXT: .LBB19_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB19_3: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
- ret i32 %m
-}
-
-define i1 @length7_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length7_eq:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: movl 3(%rdi), %ecx
-; X64-NEXT: xorl (%rsi), %eax
-; X64-NEXT: xorl 3(%rsi), %ecx
-; X64-NEXT: orl %eax, %ecx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length7_lt:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: jne .LBB21_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movl 3(%rdi), %ecx
-; X64-NEXT: movl 3(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: je .LBB21_3
-; X64-NEXT: .LBB21_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB21_3: # %endblock
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length8(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length8:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
- ret i32 %m
-}
-
-define i1 @length8_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length8_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: cmpq (%rsi), %rax
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length8_eq_const(ptr %X) nounwind {
-; X64-LABEL: length8_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
-; X64-NEXT: cmpq %rax, (%rdi)
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length9_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: movzbl 8(%rdi), %ecx
-; X64-NEXT: xorb 8(%rsi), %cl
-; X64-NEXT: movzbl %cl, %ecx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length10_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length10_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: movzwl 8(%rdi), %ecx
-; X64-NEXT: xorw 8(%rsi), %cx
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length11_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length11_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq 3(%rdi), %rcx
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: xorq 3(%rsi), %rcx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length12_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length12_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: movl 8(%rdi), %ecx
-; X64-NEXT: xorl 8(%rsi), %ecx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length12(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length12:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB29_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movl 8(%rdi), %ecx
-; X64-NEXT: movl 8(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB29_3
-; X64-NEXT: .LBB29_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB29_3: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
- ret i32 %m
-}
-
-define i1 @length13_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length13_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq 5(%rdi), %rcx
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: xorq 5(%rsi), %rcx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length14_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length14_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq 6(%rdi), %rcx
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: xorq 6(%rsi), %rcx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length15_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length15_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq 7(%rdi), %rcx
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: xorq 7(%rsi), %rcx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
-
-define i32 @length16(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length16:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB33_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 8(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB33_3
-; X64-NEXT: .LBB33_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB33_3: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
- ret i32 %m
-}
-
-define i1 @length16_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE2-LABEL: length16_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length16_eq:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE41-NEXT: pxor %xmm0, %xmm1
-; X64-SSE41-NEXT: ptest %xmm1, %xmm1
-; X64-SSE41-NEXT: setne %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length16_eq:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: setne %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length16_eq:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
-; X64-MIC-AVX-NEXT: kortestw %k0, %k0
-; X64-MIC-AVX-NEXT: setne %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length16_lt:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB35_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 8(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB35_3
-; X64-NEXT: .LBB35_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB35_3: # %endblock
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length16_gt:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq (%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: jne .LBB36_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rax
-; X64-NEXT: movq 8(%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: je .LBB36_3
-; X64-NEXT: .LBB36_2: # %res_block
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbl %edx, %edx
-; X64-NEXT: orl $1, %edx
-; X64-NEXT: .LBB36_3: # %endblock
-; X64-NEXT: testl %edx, %edx
-; X64-NEXT: setg %al
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_eq_const(ptr %X) nounwind {
-; X64-SSE2-LABEL: length16_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length16_eq_const:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length16_eq_const:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length16_eq_const:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
-; X64-MIC-AVX-NEXT: kortestw %k0, %k0
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
-
-define i32 @length24(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length24:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB38_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 8(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB38_3
-; X64-NEXT: # %bb.2: # %loadbb2
-; X64-NEXT: movq 16(%rdi), %rcx
-; X64-NEXT: movq 16(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB38_4
-; X64-NEXT: .LBB38_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB38_4: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
- ret i32 %m
-}
-
-define i1 @length24_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE2-LABEL: length24_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: pand %xmm1, %xmm2
-; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length24_eq:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE41-NEXT: pxor %xmm0, %xmm1
-; X64-SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-SSE41-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
-; X64-SSE41-NEXT: pxor %xmm0, %xmm2
-; X64-SSE41-NEXT: por %xmm1, %xmm2
-; X64-SSE41-NEXT: ptest %xmm2, %xmm2
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length24_eq:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length24_eq:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1
-; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length24_lt:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB40_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 8(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB40_3
-; X64-NEXT: # %bb.2: # %loadbb2
-; X64-NEXT: movq 16(%rdi), %rcx
-; X64-NEXT: movq 16(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB40_4
-; X64-NEXT: .LBB40_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB40_4: # %endblock
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length24_gt:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq (%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: jne .LBB41_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rax
-; X64-NEXT: movq 8(%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: jne .LBB41_3
-; X64-NEXT: # %bb.2: # %loadbb2
-; X64-NEXT: movq 16(%rdi), %rax
-; X64-NEXT: movq 16(%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: je .LBB41_4
-; X64-NEXT: .LBB41_3: # %res_block
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbl %edx, %edx
-; X64-NEXT: orl $1, %edx
-; X64-NEXT: .LBB41_4: # %endblock
-; X64-NEXT: testl %edx, %edx
-; X64-NEXT: setg %al
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_eq_const(ptr %X) nounwind {
-; X64-SSE2-LABEL: length24_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length24_eq_const:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE41-NEXT: por %xmm1, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: setne %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length24_eq_const:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: setne %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length24_eq_const:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [959985462,858927408,0,0]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: setne %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length31(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length31:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB43_4
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 8(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB43_4
-; X64-NEXT: # %bb.2: # %loadbb2
-; X64-NEXT: movq 16(%rdi), %rcx
-; X64-NEXT: movq 16(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB43_4
-; X64-NEXT: # %bb.3: # %loadbb3
-; X64-NEXT: movq 23(%rdi), %rcx
-; X64-NEXT: movq 23(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB43_5
-; X64-NEXT: .LBB43_4: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB43_5: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind
- ret i32 %m
-}
-
-define i1 @length31_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE2-LABEL: length31_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm2, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length31_eq:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE41-NEXT: pxor %xmm0, %xmm2
-; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0
-; X64-SSE41-NEXT: pxor %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm2, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length31_eq:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
-; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length31_eq:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2
-; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length31_lt:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB45_4
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 8(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB45_4
-; X64-NEXT: # %bb.2: # %loadbb2
-; X64-NEXT: movq 16(%rdi), %rcx
-; X64-NEXT: movq 16(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB45_4
-; X64-NEXT: # %bb.3: # %loadbb3
-; X64-NEXT: movq 23(%rdi), %rcx
-; X64-NEXT: movq 23(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB45_5
-; X64-NEXT: .LBB45_4: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB45_5: # %endblock
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length31_gt:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq (%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: jne .LBB46_4
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rax
-; X64-NEXT: movq 8(%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: jne .LBB46_4
-; X64-NEXT: # %bb.2: # %loadbb2
-; X64-NEXT: movq 16(%rdi), %rax
-; X64-NEXT: movq 16(%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: jne .LBB46_4
-; X64-NEXT: # %bb.3: # %loadbb3
-; X64-NEXT: movq 23(%rdi), %rax
-; X64-NEXT: movq 23(%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: je .LBB46_5
-; X64-NEXT: .LBB46_4: # %res_block
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbl %edx, %edx
-; X64-NEXT: orl $1, %edx
-; X64-NEXT: .LBB46_5: # %endblock
-; X64-NEXT: testl %edx, %edx
-; X64-NEXT: setg %al
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; X64-SSE2-LABEL: length31_eq_prefer128:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm2, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length31_eq_prefer128:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE41-NEXT: pxor %xmm0, %xmm2
-; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0
-; X64-SSE41-NEXT: pxor %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm2, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length31_eq_prefer128:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
-; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length31_eq_prefer128:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2
-; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_eq_const(ptr %X) nounwind {
-; X64-SSE2-LABEL: length31_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length31_eq_const:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE41-NEXT: por %xmm1, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: setne %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length31_eq_const:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: setne %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length31_eq_const:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [943142453,842084409,909456435,809056311]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: setne %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length32(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length32:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB49_4
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 8(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB49_4
-; X64-NEXT: # %bb.2: # %loadbb2
-; X64-NEXT: movq 16(%rdi), %rcx
-; X64-NEXT: movq 16(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB49_4
-; X64-NEXT: # %bb.3: # %loadbb3
-; X64-NEXT: movq 24(%rdi), %rcx
-; X64-NEXT: movq 24(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB49_5
-; X64-NEXT: .LBB49_4: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB49_5: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
- ret i32 %m
-}
-
-; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
-
-define i1 @length32_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE2-LABEL: length32_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm2, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length32_eq:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE41-NEXT: pxor %xmm0, %xmm2
-; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE41-NEXT: pxor %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm2, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX1-LABEL: length32_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length32_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length32_eq:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: sete %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length32_eq:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
-; X64-MIC-AVX-NEXT: kortestw %k0, %k0
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length32_lt:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB51_4
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 8(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB51_4
-; X64-NEXT: # %bb.2: # %loadbb2
-; X64-NEXT: movq 16(%rdi), %rcx
-; X64-NEXT: movq 16(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB51_4
-; X64-NEXT: # %bb.3: # %loadbb3
-; X64-NEXT: movq 24(%rdi), %rcx
-; X64-NEXT: movq 24(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB51_5
-; X64-NEXT: .LBB51_4: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB51_5: # %endblock
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length32_gt:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq (%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: jne .LBB52_4
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rax
-; X64-NEXT: movq 8(%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: jne .LBB52_4
-; X64-NEXT: # %bb.2: # %loadbb2
-; X64-NEXT: movq 16(%rdi), %rax
-; X64-NEXT: movq 16(%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: jne .LBB52_4
-; X64-NEXT: # %bb.3: # %loadbb3
-; X64-NEXT: movq 24(%rdi), %rax
-; X64-NEXT: movq 24(%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: je .LBB52_5
-; X64-NEXT: .LBB52_4: # %res_block
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbl %edx, %edx
-; X64-NEXT: orl $1, %edx
-; X64-NEXT: .LBB52_5: # %endblock
-; X64-NEXT: testl %edx, %edx
-; X64-NEXT: setg %al
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; X64-SSE2-LABEL: length32_eq_prefer128:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm2, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length32_eq_prefer128:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE41-NEXT: pxor %xmm0, %xmm2
-; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE41-NEXT: pxor %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm2, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length32_eq_prefer128:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
-; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length32_eq_prefer128:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2
-; X64-MIC-AVX-NEXT: vmovdqu 16(%rsi), %xmm3
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_const(ptr %X) nounwind {
-; X64-SSE2-LABEL: length32_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length32_eq_const:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE41-NEXT: por %xmm1, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: setne %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX1-LABEL: length32_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length32_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length32_eq_const:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: setne %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length32_eq_const:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
-; X64-MIC-AVX-NEXT: kortestw %k0, %k0
-; X64-MIC-AVX-NEXT: setne %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length48(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length48:
-; X64: # %bb.0:
-; X64-NEXT: movl $48, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind
- ret i32 %m
-}
-
-define i1 @length48_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE2-LABEL: length48_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm3
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm3
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm3, %xmm0
-; X64-SSE2-NEXT: movdqu 32(%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm2, %xmm1
-; X64-SSE2-NEXT: pand %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length48_eq:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm3
-; X64-SSE41-NEXT: pxor %xmm0, %xmm3
-; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE41-NEXT: pxor %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm3, %xmm0
-; X64-SSE41-NEXT: movdqu 32(%rsi), %xmm1
-; X64-SSE41-NEXT: pxor %xmm2, %xmm1
-; X64-SSE41-NEXT: por %xmm0, %xmm1
-; X64-SSE41-NEXT: ptest %xmm1, %xmm1
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX1-LABEL: length48_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %xmm1
-; X64-AVX1-NEXT: vmovups 32(%rsi), %xmm2
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length48_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %xmm1
-; X64-AVX2-NEXT: vmovdqu 32(%rsi), %xmm2
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length48_eq:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1
-; X64-AVX512-NEXT: vmovdqu 32(%rsi), %xmm2
-; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1
-; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: sete %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length48_eq:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1
-; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2
-; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm3
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length48_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $48, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length48_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $48, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; X64-SSE2-LABEL: length48_eq_prefer128:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm3
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm3
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm3, %xmm0
-; X64-SSE2-NEXT: movdqu 32(%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm2, %xmm1
-; X64-SSE2-NEXT: pand %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length48_eq_prefer128:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm3
-; X64-SSE41-NEXT: pxor %xmm0, %xmm3
-; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE41-NEXT: pxor %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm3, %xmm0
-; X64-SSE41-NEXT: movdqu 32(%rsi), %xmm1
-; X64-SSE41-NEXT: pxor %xmm2, %xmm1
-; X64-SSE41-NEXT: por %xmm0, %xmm1
-; X64-SSE41-NEXT: ptest %xmm1, %xmm1
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length48_eq_prefer128:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
-; X64-AVX-NEXT: vmovdqu 32(%rdi), %xmm2
-; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpxor 32(%rsi), %xmm2, %xmm1
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length48_eq_prefer128:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
-; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm3
-; X64-MIC-AVX-NEXT: vmovdqu 16(%rsi), %xmm4
-; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm5
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm4, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm0, %k1
-; X64-MIC-AVX-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm5, %zmm2, %k1
-; X64-MIC-AVX-NEXT: kortestw %k1, %k0
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_eq_const(ptr %X) nounwind {
-; X64-SSE2-LABEL: length48_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; X64-SSE2-NEXT: pand %xmm0, %xmm2
-; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length48_eq_const:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE41-NEXT: por %xmm1, %xmm0
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; X64-SSE41-NEXT: por %xmm0, %xmm2
-; X64-SSE41-NEXT: ptest %xmm2, %xmm2
-; X64-SSE41-NEXT: setne %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX1-LABEL: length48_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %xmm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length48_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %xmm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length48_eq_const:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: setne %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length48_eq_const:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm1
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,0,0,0,0]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: setne %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length63(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length63:
-; X64: # %bb.0:
-; X64-NEXT: movl $63, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind
- ret i32 %m
-}
-
-define i1 @length63_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE2-LABEL: length63_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE2-NEXT: movdqu 47(%rdi), %xmm3
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm4
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm4
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm4, %xmm0
-; X64-SSE2-NEXT: movdqu 32(%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm2, %xmm1
-; X64-SSE2-NEXT: movdqu 47(%rsi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb %xmm3, %xmm2
-; X64-SSE2-NEXT: pand %xmm1, %xmm2
-; X64-SSE2-NEXT: pand %xmm0, %xmm2
-; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length63_eq:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE41-NEXT: movdqu 47(%rdi), %xmm3
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm4
-; X64-SSE41-NEXT: pxor %xmm0, %xmm4
-; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE41-NEXT: pxor %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm4, %xmm0
-; X64-SSE41-NEXT: movdqu 32(%rsi), %xmm1
-; X64-SSE41-NEXT: pxor %xmm2, %xmm1
-; X64-SSE41-NEXT: movdqu 47(%rsi), %xmm2
-; X64-SSE41-NEXT: pxor %xmm3, %xmm2
-; X64-SSE41-NEXT: por %xmm1, %xmm2
-; X64-SSE41-NEXT: por %xmm0, %xmm2
-; X64-SSE41-NEXT: ptest %xmm2, %xmm2
-; X64-SSE41-NEXT: setne %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX1-LABEL: length63_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 31(%rdi), %ymm1
-; X64-AVX1-NEXT: vxorps 31(%rsi), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length63_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor 31(%rsi), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length63_eq:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1
-; X64-AVX512-NEXT: vpxor 31(%rsi), %ymm1, %ymm1
-; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: setne %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length63_eq:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm2
-; X64-MIC-AVX-NEXT: vmovdqu 31(%rsi), %ymm3
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: setne %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length63_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $63, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length63_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $63, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_eq_const(ptr %X) nounwind {
-; X64-SSE2-LABEL: length63_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE2-NEXT: movdqu 47(%rdi), %xmm3
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; X64-SSE2-NEXT: pand %xmm3, %xmm2
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm2, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length63_eq_const:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE41-NEXT: movdqu 47(%rdi), %xmm3
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; X64-SSE41-NEXT: por %xmm3, %xmm2
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE41-NEXT: por %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm2, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX1-LABEL: length63_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 31(%rdi), %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length63_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length63_eq_const:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: sete %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length63_eq_const:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [875770417,943142453,842084409,909456435,809056311,875770417,943142453,842084409]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length64(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length64:
-; X64: # %bb.0:
-; X64-NEXT: movl $64, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
- ret i32 %m
-}
-
-define i1 @length64_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE2-LABEL: length64_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE2-NEXT: movdqu 48(%rdi), %xmm3
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm4
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm4
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm4, %xmm0
-; X64-SSE2-NEXT: movdqu 32(%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm2, %xmm1
-; X64-SSE2-NEXT: movdqu 48(%rsi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb %xmm3, %xmm2
-; X64-SSE2-NEXT: pand %xmm1, %xmm2
-; X64-SSE2-NEXT: pand %xmm0, %xmm2
-; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length64_eq:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE41-NEXT: movdqu 48(%rdi), %xmm3
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm4
-; X64-SSE41-NEXT: pxor %xmm0, %xmm4
-; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE41-NEXT: pxor %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm4, %xmm0
-; X64-SSE41-NEXT: movdqu 32(%rsi), %xmm1
-; X64-SSE41-NEXT: pxor %xmm2, %xmm1
-; X64-SSE41-NEXT: movdqu 48(%rsi), %xmm2
-; X64-SSE41-NEXT: pxor %xmm3, %xmm2
-; X64-SSE41-NEXT: por %xmm1, %xmm2
-; X64-SSE41-NEXT: por %xmm0, %xmm2
-; X64-SSE41-NEXT: ptest %xmm2, %xmm2
-; X64-SSE41-NEXT: setne %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX1-LABEL: length64_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length64_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length64_eq:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512-NEXT: vpcmpneqd (%rsi), %zmm0, %k0
-; X64-AVX512-NEXT: kortestw %k0, %k0
-; X64-AVX512-NEXT: setne %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length64_eq:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-MIC-AVX2-NEXT: vmovdqu (%rsi), %ymm2
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rsi), %ymm3
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
-; X64-MIC-AVX2-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX2-NEXT: setne %al
-; X64-MIC-AVX2-NEXT: vzeroupper
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length64_eq:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k0
-; X64-MIC-AVX512F-NEXT: setne %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length64_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $64, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length64_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $64, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_eq_const(ptr %X) nounwind {
-; X64-SSE2-LABEL: length64_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE2-NEXT: movdqu 48(%rdi), %xmm3
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; X64-SSE2-NEXT: pand %xmm3, %xmm2
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm2, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length64_eq_const:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2
-; X64-SSE41-NEXT: movdqu 48(%rdi), %xmm3
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; X64-SSE41-NEXT: por %xmm3, %xmm2
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE41-NEXT: por %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm2, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX1-LABEL: length64_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length64_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length64_eq_const:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0
-; X64-AVX512-NEXT: kortestw %k0, %k0
-; X64-AVX512-NEXT: sete %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length64_eq_const:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,825243960,892613426,959985462,858927408]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX2-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX2-NEXT: sete %al
-; X64-MIC-AVX2-NEXT: vzeroupper
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length64_eq_const:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k0
-; X64-MIC-AVX512F-NEXT: sete %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length96(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length96:
-; X64: # %bb.0:
-; X64-NEXT: movl $96, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind
- ret i32 %m
-}
-
-define i1 @length96_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE-LABEL: length96_eq:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $96, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: setne %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length96_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2
-; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vxorps 64(%rsi), %ymm2, %ymm1
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length96_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2
-; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpxor 64(%rsi), %ymm2, %ymm1
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length96_eq:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu 64(%rdi), %ymm1
-; X64-AVX512BW-NEXT: vmovdqu 64(%rsi), %ymm2
-; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb %zmm2, %zmm1, %k1
-; X64-AVX512BW-NEXT: kortestq %k1, %k0
-; X64-AVX512BW-NEXT: setne %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length96_eq:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1
-; X64-AVX512F-NEXT: vmovdqu 64(%rsi), %ymm2
-; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0
-; X64-AVX512F-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
-; X64-AVX512F-NEXT: kortestw %k1, %k0
-; X64-AVX512F-NEXT: setne %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length96_eq:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-MIC-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2
-; X64-MIC-AVX2-NEXT: vmovdqu (%rsi), %ymm3
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rsi), %ymm4
-; X64-MIC-AVX2-NEXT: vmovdqu 64(%rsi), %ymm5
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm4, %zmm1, %k0
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm3, %zmm0, %k1
-; X64-MIC-AVX2-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm5, %zmm2, %k1
-; X64-MIC-AVX2-NEXT: kortestw %k1, %k0
-; X64-MIC-AVX2-NEXT: setne %al
-; X64-MIC-AVX2-NEXT: vzeroupper
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length96_eq:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1
-; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rsi), %ymm2
-; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0
-; X64-MIC-AVX512F-NEXT: setne %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length96_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $96, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length96_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $96, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_eq_const(ptr %X) nounwind {
-; X64-SSE-LABEL: length96_eq_const:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $.L.str, %esi
-; X64-SSE-NEXT: movl $96, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: sete %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length96_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length96_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length96_eq_const:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu 64(%rdi), %ymm1
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k1
-; X64-AVX512BW-NEXT: kortestq %k1, %k0
-; X64-AVX512BW-NEXT: sete %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length96_eq_const:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1
-; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0
-; X64-AVX512F-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k1
-; X64-AVX512F-NEXT: kortestw %k1, %k0
-; X64-AVX512F-NEXT: sete %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length96_eq_const:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-MIC-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [892613426,959985462,858927408,926299444,825243960,892613426,959985462,858927408]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX2-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [926299444,825243960,892613426,959985462,858927408,926299444,825243960,892613426]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm0, %zmm2, %k1
-; X64-MIC-AVX2-NEXT: kortestw %k1, %k0
-; X64-MIC-AVX2-NEXT: sete %al
-; X64-MIC-AVX2-NEXT: vzeroupper
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length96_eq_const:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0
-; X64-MIC-AVX512F-NEXT: sete %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length127(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length127:
-; X64: # %bb.0:
-; X64-NEXT: movl $127, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind
- ret i32 %m
-}
-
-define i1 @length127_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE-LABEL: length127_eq:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $127, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: setne %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length127_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2
-; X64-AVX1-NEXT: vmovups 95(%rdi), %ymm3
-; X64-AVX1-NEXT: vxorps 95(%rsi), %ymm3, %ymm3
-; X64-AVX1-NEXT: vxorps 64(%rsi), %ymm2, %ymm2
-; X64-AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
-; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length127_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2
-; X64-AVX2-NEXT: vmovdqu 95(%rdi), %ymm3
-; X64-AVX2-NEXT: vpxor 95(%rsi), %ymm3, %ymm3
-; X64-AVX2-NEXT: vpxor 64(%rsi), %ymm2, %ymm2
-; X64-AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
-; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length127_eq:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vpcmpneqb 63(%rsi), %zmm1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
-; X64-AVX512BW-NEXT: setne %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length127_eq:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1
-; X64-AVX512F-NEXT: vpcmpneqd 63(%rsi), %zmm1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
-; X64-AVX512F-NEXT: setne %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length127_eq:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-MIC-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2
-; X64-MIC-AVX2-NEXT: vmovdqu 95(%rdi), %ymm3
-; X64-MIC-AVX2-NEXT: vmovdqu (%rsi), %ymm4
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rsi), %ymm5
-; X64-MIC-AVX2-NEXT: vmovdqu 64(%rsi), %ymm6
-; X64-MIC-AVX2-NEXT: vmovdqu 95(%rsi), %ymm7
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm7, %zmm3, %k0
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm6, %zmm2, %k1
-; X64-MIC-AVX2-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm5, %zmm1, %k1
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm4, %zmm0, %k2
-; X64-MIC-AVX2-NEXT: korw %k1, %k2, %k1
-; X64-MIC-AVX2-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX2-NEXT: setne %al
-; X64-MIC-AVX2-NEXT: vzeroupper
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length127_eq:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd 63(%rsi), %zmm1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX512F-NEXT: setne %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length127_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $127, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length127_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $127, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_eq_const(ptr %X) nounwind {
-; X64-SSE-LABEL: length127_eq_const:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $.L.str, %esi
-; X64-SSE-NEXT: movl $127, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: sete %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length127_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2
-; X64-AVX1-NEXT: vmovups 95(%rdi), %ymm3
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; X64-AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length127_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2
-; X64-AVX2-NEXT: vmovdqu 95(%rdi), %ymm3
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; X64-AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length127_eq_const:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str+63(%rip), %zmm1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
-; X64-AVX512BW-NEXT: sete %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length127_eq_const:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1
-; X64-AVX512F-NEXT: vpcmpneqd .L.str+63(%rip), %zmm1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
-; X64-AVX512F-NEXT: sete %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length127_eq_const:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-MIC-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2
-; X64-MIC-AVX2-NEXT: vmovdqu 95(%rdi), %ymm3
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [943142453,842084409,909456435,809056311,875770417,943142453,842084409,909456435]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm4, %zmm3, %k0
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [926299444,825243960,892613426,959985462,858927408,926299444,825243960,892613426]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
-; X64-MIC-AVX2-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,825243960,892613426,959985462,858927408]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm1, %zmm0, %k2
-; X64-MIC-AVX2-NEXT: korw %k1, %k2, %k1
-; X64-MIC-AVX2-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX2-NEXT: sete %al
-; X64-MIC-AVX2-NEXT: vzeroupper
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length127_eq_const:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+63(%rip), %zmm1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX512F-NEXT: sete %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length128(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length128:
-; X64: # %bb.0:
-; X64-NEXT: movl $128, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind
- ret i32 %m
-}
-
-define i1 @length128_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE-LABEL: length128_eq:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $128, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: setne %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length128_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2
-; X64-AVX1-NEXT: vmovups 96(%rdi), %ymm3
-; X64-AVX1-NEXT: vxorps 96(%rsi), %ymm3, %ymm3
-; X64-AVX1-NEXT: vxorps 64(%rsi), %ymm2, %ymm2
-; X64-AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
-; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length128_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2
-; X64-AVX2-NEXT: vmovdqu 96(%rdi), %ymm3
-; X64-AVX2-NEXT: vpxor 96(%rsi), %ymm3, %ymm3
-; X64-AVX2-NEXT: vpxor 64(%rsi), %ymm2, %ymm2
-; X64-AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
-; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length128_eq:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vpcmpneqb 64(%rsi), %zmm1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
-; X64-AVX512BW-NEXT: setne %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length128_eq:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
-; X64-AVX512F-NEXT: setne %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length128_eq:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-MIC-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2
-; X64-MIC-AVX2-NEXT: vmovdqu 96(%rdi), %ymm3
-; X64-MIC-AVX2-NEXT: vmovdqu (%rsi), %ymm4
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rsi), %ymm5
-; X64-MIC-AVX2-NEXT: vmovdqu 64(%rsi), %ymm6
-; X64-MIC-AVX2-NEXT: vmovdqu 96(%rsi), %ymm7
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm7, %zmm3, %k0
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm6, %zmm2, %k1
-; X64-MIC-AVX2-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm5, %zmm1, %k1
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm4, %zmm0, %k2
-; X64-MIC-AVX2-NEXT: korw %k1, %k2, %k1
-; X64-MIC-AVX2-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX2-NEXT: setne %al
-; X64-MIC-AVX2-NEXT: vzeroupper
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length128_eq:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX512F-NEXT: setne %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length128_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $128, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length128_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $128, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_eq_const(ptr %X) nounwind {
-; X64-SSE-LABEL: length128_eq_const:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $.L.str, %esi
-; X64-SSE-NEXT: movl $128, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: sete %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length128_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2
-; X64-AVX1-NEXT: vmovups 96(%rdi), %ymm3
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; X64-AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length128_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2
-; X64-AVX2-NEXT: vmovdqu 96(%rdi), %ymm3
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; X64-AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length128_eq_const:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str+64(%rip), %zmm1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
-; X64-AVX512BW-NEXT: sete %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length128_eq_const:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
-; X64-AVX512F-NEXT: sete %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length128_eq_const:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-MIC-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2
-; X64-MIC-AVX2-NEXT: vmovdqu 96(%rdi), %ymm3
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [959985462,858927408,926299444,825243960,892613426,959985462,858927408,926299444]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm4, %zmm3, %k0
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [926299444,825243960,892613426,959985462,858927408,926299444,825243960,892613426]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
-; X64-MIC-AVX2-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,825243960,892613426,959985462,858927408]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm1, %zmm0, %k2
-; X64-MIC-AVX2-NEXT: korw %k1, %k2, %k1
-; X64-MIC-AVX2-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX2-NEXT: sete %al
-; X64-MIC-AVX2-NEXT: vzeroupper
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length128_eq_const:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX512F-NEXT: sete %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length192(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length192:
-; X64: # %bb.0:
-; X64-NEXT: movl $192, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind
- ret i32 %m
-}
-
-define i1 @length192_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE-LABEL: length192_eq:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $192, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: setne %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length192_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $192, %edx
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length192_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rax
-; X64-AVX2-NEXT: movl $192, %edx
-; X64-AVX2-NEXT: callq memcmp
-; X64-AVX2-NEXT: testl %eax, %eax
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: popq %rcx
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length192_eq:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-AVX512BW-NEXT: vpcmpneqb 64(%rsi), %zmm1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k1
-; X64-AVX512BW-NEXT: korq %k0, %k1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb 128(%rsi), %zmm2, %k1
-; X64-AVX512BW-NEXT: kortestq %k1, %k0
-; X64-AVX512BW-NEXT: setne %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length192_eq:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1
-; X64-AVX512F-NEXT: korw %k0, %k1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd 128(%rsi), %zmm2, %k1
-; X64-AVX512F-NEXT: kortestw %k1, %k0
-; X64-AVX512F-NEXT: setne %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length192_eq:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: pushq %rax
-; X64-MIC-AVX2-NEXT: movl $192, %edx
-; X64-MIC-AVX2-NEXT: callq memcmp
-; X64-MIC-AVX2-NEXT: testl %eax, %eax
-; X64-MIC-AVX2-NEXT: setne %al
-; X64-MIC-AVX2-NEXT: popq %rcx
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length192_eq:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-MIC-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1
-; X64-MIC-AVX512F-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd 128(%rsi), %zmm2, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0
-; X64-MIC-AVX512F-NEXT: setne %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length192_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $192, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length192_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $192, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_eq_const(ptr %X) nounwind {
-; X64-SSE-LABEL: length192_eq_const:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $.L.str, %esi
-; X64-SSE-NEXT: movl $192, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: sete %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length192_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $.L.str, %esi
-; X64-AVX1-NEXT: movl $192, %edx
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length192_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rax
-; X64-AVX2-NEXT: movl $.L.str, %esi
-; X64-AVX2-NEXT: movl $192, %edx
-; X64-AVX2-NEXT: callq memcmp
-; X64-AVX2-NEXT: testl %eax, %eax
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: popq %rcx
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length192_eq_const:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str+64(%rip), %zmm1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k1
-; X64-AVX512BW-NEXT: korq %k0, %k1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str+128(%rip), %zmm2, %k1
-; X64-AVX512BW-NEXT: kortestq %k1, %k0
-; X64-AVX512BW-NEXT: sete %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length192_eq_const:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1
-; X64-AVX512F-NEXT: korw %k0, %k1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd .L.str+128(%rip), %zmm2, %k1
-; X64-AVX512F-NEXT: kortestw %k1, %k0
-; X64-AVX512F-NEXT: sete %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length192_eq_const:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: pushq %rax
-; X64-MIC-AVX2-NEXT: movl $.L.str, %esi
-; X64-MIC-AVX2-NEXT: movl $192, %edx
-; X64-MIC-AVX2-NEXT: callq memcmp
-; X64-MIC-AVX2-NEXT: testl %eax, %eax
-; X64-MIC-AVX2-NEXT: sete %al
-; X64-MIC-AVX2-NEXT: popq %rcx
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length192_eq_const:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1
-; X64-MIC-AVX512F-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+128(%rip), %zmm2, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0
-; X64-MIC-AVX512F-NEXT: sete %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length255(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length255:
-; X64: # %bb.0:
-; X64-NEXT: movl $255, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind
- ret i32 %m
-}
-
-define i1 @length255_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE-LABEL: length255_eq:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $255, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: setne %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length255_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $255, %edx
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length255_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rax
-; X64-AVX2-NEXT: movl $255, %edx
-; X64-AVX2-NEXT: callq memcmp
-; X64-AVX2-NEXT: testl %eax, %eax
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: popq %rcx
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length255_eq:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-AVX512BW-NEXT: vmovdqu64 191(%rdi), %zmm3
-; X64-AVX512BW-NEXT: vpcmpneqb 191(%rsi), %zmm3, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb 128(%rsi), %zmm2, %k1
-; X64-AVX512BW-NEXT: korq %k0, %k1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb 64(%rsi), %zmm1, %k1
-; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k2
-; X64-AVX512BW-NEXT: korq %k1, %k2, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
-; X64-AVX512BW-NEXT: setne %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length255_eq:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-AVX512F-NEXT: vmovdqu64 191(%rdi), %zmm3
-; X64-AVX512F-NEXT: vpcmpneqd 191(%rsi), %zmm3, %k0
-; X64-AVX512F-NEXT: vpcmpneqd 128(%rsi), %zmm2, %k1
-; X64-AVX512F-NEXT: korw %k0, %k1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k1
-; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k2
-; X64-AVX512F-NEXT: korw %k1, %k2, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
-; X64-AVX512F-NEXT: setne %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length255_eq:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: pushq %rax
-; X64-MIC-AVX2-NEXT: movl $255, %edx
-; X64-MIC-AVX2-NEXT: callq memcmp
-; X64-MIC-AVX2-NEXT: testl %eax, %eax
-; X64-MIC-AVX2-NEXT: setne %al
-; X64-MIC-AVX2-NEXT: popq %rcx
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length255_eq:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-MIC-AVX512F-NEXT: vmovdqu64 191(%rdi), %zmm3
-; X64-MIC-AVX512F-NEXT: vpcmpneqd 191(%rsi), %zmm3, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd 128(%rsi), %zmm2, %k1
-; X64-MIC-AVX512F-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k2
-; X64-MIC-AVX512F-NEXT: korw %k1, %k2, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX512F-NEXT: setne %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length255_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $255, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length255_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $255, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_eq_const(ptr %X) nounwind {
-; X64-SSE-LABEL: length255_eq_const:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $.L.str, %esi
-; X64-SSE-NEXT: movl $255, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: sete %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length255_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $.L.str, %esi
-; X64-AVX1-NEXT: movl $255, %edx
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length255_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rax
-; X64-AVX2-NEXT: movl $.L.str, %esi
-; X64-AVX2-NEXT: movl $255, %edx
-; X64-AVX2-NEXT: callq memcmp
-; X64-AVX2-NEXT: testl %eax, %eax
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: popq %rcx
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length255_eq_const:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-AVX512BW-NEXT: vmovdqu64 191(%rdi), %zmm3
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str+191(%rip), %zmm3, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str+128(%rip), %zmm2, %k1
-; X64-AVX512BW-NEXT: korq %k0, %k1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str+64(%rip), %zmm1, %k1
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k2
-; X64-AVX512BW-NEXT: korq %k1, %k2, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
-; X64-AVX512BW-NEXT: sete %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length255_eq_const:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-AVX512F-NEXT: vmovdqu64 191(%rdi), %zmm3
-; X64-AVX512F-NEXT: vpcmpneqd .L.str+191(%rip), %zmm3, %k0
-; X64-AVX512F-NEXT: vpcmpneqd .L.str+128(%rip), %zmm2, %k1
-; X64-AVX512F-NEXT: korw %k0, %k1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k1
-; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k2
-; X64-AVX512F-NEXT: korw %k1, %k2, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
-; X64-AVX512F-NEXT: sete %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length255_eq_const:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: pushq %rax
-; X64-MIC-AVX2-NEXT: movl $.L.str, %esi
-; X64-MIC-AVX2-NEXT: movl $255, %edx
-; X64-MIC-AVX2-NEXT: callq memcmp
-; X64-MIC-AVX2-NEXT: testl %eax, %eax
-; X64-MIC-AVX2-NEXT: sete %al
-; X64-MIC-AVX2-NEXT: popq %rcx
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length255_eq_const:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-MIC-AVX512F-NEXT: vmovdqu64 191(%rdi), %zmm3
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+191(%rip), %zmm3, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+128(%rip), %zmm2, %k1
-; X64-MIC-AVX512F-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k2
-; X64-MIC-AVX512F-NEXT: korw %k1, %k2, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX512F-NEXT: sete %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length256(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length256:
-; X64: # %bb.0:
-; X64-NEXT: movl $256, %edx # imm = 0x100
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind
- ret i32 %m
-}
-
-define i1 @length256_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE-LABEL: length256_eq:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $256, %edx # imm = 0x100
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: setne %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length256_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $256, %edx # imm = 0x100
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length256_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rax
-; X64-AVX2-NEXT: movl $256, %edx # imm = 0x100
-; X64-AVX2-NEXT: callq memcmp
-; X64-AVX2-NEXT: testl %eax, %eax
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: popq %rcx
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length256_eq:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-AVX512BW-NEXT: vmovdqu64 192(%rdi), %zmm3
-; X64-AVX512BW-NEXT: vpcmpneqb 192(%rsi), %zmm3, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb 128(%rsi), %zmm2, %k1
-; X64-AVX512BW-NEXT: korq %k0, %k1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb 64(%rsi), %zmm1, %k1
-; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k2
-; X64-AVX512BW-NEXT: korq %k1, %k2, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
-; X64-AVX512BW-NEXT: setne %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length256_eq:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-AVX512F-NEXT: vmovdqu64 192(%rdi), %zmm3
-; X64-AVX512F-NEXT: vpcmpneqd 192(%rsi), %zmm3, %k0
-; X64-AVX512F-NEXT: vpcmpneqd 128(%rsi), %zmm2, %k1
-; X64-AVX512F-NEXT: korw %k0, %k1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k1
-; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k2
-; X64-AVX512F-NEXT: korw %k1, %k2, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
-; X64-AVX512F-NEXT: setne %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length256_eq:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: pushq %rax
-; X64-MIC-AVX2-NEXT: movl $256, %edx # imm = 0x100
-; X64-MIC-AVX2-NEXT: callq memcmp
-; X64-MIC-AVX2-NEXT: testl %eax, %eax
-; X64-MIC-AVX2-NEXT: setne %al
-; X64-MIC-AVX2-NEXT: popq %rcx
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length256_eq:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-MIC-AVX512F-NEXT: vmovdqu64 192(%rdi), %zmm3
-; X64-MIC-AVX512F-NEXT: vpcmpneqd 192(%rsi), %zmm3, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd 128(%rsi), %zmm2, %k1
-; X64-MIC-AVX512F-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k2
-; X64-MIC-AVX512F-NEXT: korw %k1, %k2, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX512F-NEXT: setne %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length256_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $256, %edx # imm = 0x100
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length256_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $256, %edx # imm = 0x100
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_eq_const(ptr %X) nounwind {
-; X64-SSE-LABEL: length256_eq_const:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $.L.str, %esi
-; X64-SSE-NEXT: movl $256, %edx # imm = 0x100
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: sete %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length256_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $.L.str, %esi
-; X64-AVX1-NEXT: movl $256, %edx # imm = 0x100
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length256_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rax
-; X64-AVX2-NEXT: movl $.L.str, %esi
-; X64-AVX2-NEXT: movl $256, %edx # imm = 0x100
-; X64-AVX2-NEXT: callq memcmp
-; X64-AVX2-NEXT: testl %eax, %eax
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: popq %rcx
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length256_eq_const:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-AVX512BW-NEXT: vmovdqu64 192(%rdi), %zmm3
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str+192(%rip), %zmm3, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str+128(%rip), %zmm2, %k1
-; X64-AVX512BW-NEXT: korq %k0, %k1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str+64(%rip), %zmm1, %k1
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k2
-; X64-AVX512BW-NEXT: korq %k1, %k2, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
-; X64-AVX512BW-NEXT: sete %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length256_eq_const:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-AVX512F-NEXT: vmovdqu64 192(%rdi), %zmm3
-; X64-AVX512F-NEXT: vpcmpneqd .L.str+192(%rip), %zmm3, %k0
-; X64-AVX512F-NEXT: vpcmpneqd .L.str+128(%rip), %zmm2, %k1
-; X64-AVX512F-NEXT: korw %k0, %k1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k1
-; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k2
-; X64-AVX512F-NEXT: korw %k1, %k2, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
-; X64-AVX512F-NEXT: sete %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length256_eq_const:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: pushq %rax
-; X64-MIC-AVX2-NEXT: movl $.L.str, %esi
-; X64-MIC-AVX2-NEXT: movl $256, %edx # imm = 0x100
-; X64-MIC-AVX2-NEXT: callq memcmp
-; X64-MIC-AVX2-NEXT: testl %eax, %eax
-; X64-MIC-AVX2-NEXT: sete %al
-; X64-MIC-AVX2-NEXT: popq %rcx
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length256_eq_const:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2
-; X64-MIC-AVX512F-NEXT: vmovdqu64 192(%rdi), %zmm3
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+192(%rip), %zmm3, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+128(%rip), %zmm2, %k1
-; X64-MIC-AVX512F-NEXT: korw %k0, %k1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k2
-; X64-MIC-AVX512F-NEXT: korw %k1, %k2, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX512F-NEXT: sete %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length384(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length384:
-; X64: # %bb.0:
-; X64-NEXT: movl $384, %edx # imm = 0x180
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind
- ret i32 %m
-}
-
-define i1 @length384_eq(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length384_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $384, %edx # imm = 0x180
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length384_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $384, %edx # imm = 0x180
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length384_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $384, %edx # imm = 0x180
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_eq_const(ptr %X) nounwind {
-; X64-LABEL: length384_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $.L.str, %esi
-; X64-NEXT: movl $384, %edx # imm = 0x180
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length511(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length511:
-; X64: # %bb.0:
-; X64-NEXT: movl $511, %edx # imm = 0x1FF
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind
- ret i32 %m
-}
-
-define i1 @length511_eq(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length511_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $511, %edx # imm = 0x1FF
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length511_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $511, %edx # imm = 0x1FF
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length511_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $511, %edx # imm = 0x1FF
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_eq_const(ptr %X) nounwind {
-; X64-LABEL: length511_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $.L.str, %esi
-; X64-NEXT: movl $511, %edx # imm = 0x1FF
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length512(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length512:
-; X64: # %bb.0:
-; X64-NEXT: movl $512, %edx # imm = 0x200
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind
- ret i32 %m
-}
-
-define i1 @length512_eq(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length512_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $512, %edx # imm = 0x200
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length512_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $512, %edx # imm = 0x200
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length512_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $512, %edx # imm = 0x200
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_eq_const(ptr %X) nounwind {
-; X64-LABEL: length512_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $.L.str, %esi
-; X64-NEXT: movl $512, %edx # imm = 0x200
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; This checks that we do not do stupid things with huge sizes.
-define i32 @huge_length(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: huge_length:
-; X64: # %bb.0:
-; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind
- ret i32 %m
-}
-
-define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: huge_length_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; This checks non-constant sizes.
-define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind {
-; X64-LABEL: nonconst_length:
-; X64: # %bb.0:
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind
- ret i32 %m
-}
-
-define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind {
-; X64-LABEL: nonconst_length_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
deleted file mode 100644
index 762691151f4bd3..00000000000000
--- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
+++ /dev/null
@@ -1,583 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2
-
-; This tests codegen time inlining/optimization of memcmp
-; rdar://6480398
-
- at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
-
-declare dso_local i32 @memcmp(ptr, ptr, i32)
-declare dso_local i32 @bcmp(ptr, ptr, i32)
-
-define i32 @length2(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length2:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- ret i32 %m
-}
-
-define i1 @length2_eq(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length2_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: cmpw (%eax), %cx
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_const(ptr %X) nounwind optsize {
-; X86-LABEL: length2_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: cmpl $12849, %eax # imm = 0x3231
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length2_eq_nobuiltin_attr:
-; X86: # %bb.0:
-; X86-NEXT: pushl $2
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length3(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length3:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: movzwl (%ecx), %esi
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: rolw $8, %si
-; X86-NEXT: cmpw %si, %dx
-; X86-NEXT: jne .LBB4_3
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movzbl 2(%eax), %eax
-; X86-NEXT: movzbl 2(%ecx), %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: jmp .LBB4_2
-; X86-NEXT: .LBB4_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpw %si, %dx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB4_2: # %endblock
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
- ret i32 %m
-}
-
-define i1 @length3_eq(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length3_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %edx
-; X86-NEXT: xorw (%eax), %dx
-; X86-NEXT: movb 2(%ecx), %cl
-; X86-NEXT: xorb 2(%eax), %cl
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: orw %dx, %ax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length4(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length4:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: seta %al
-; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- ret i32 %m
-}
-
-define i1 @length4_eq(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length4_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: cmpl (%eax), %ecx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_eq_const(ptr %X) nounwind optsize {
-; X86-LABEL: length4_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length5(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length5:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: movl (%ecx), %esi
-; X86-NEXT: bswapl %edx
-; X86-NEXT: bswapl %esi
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: jne .LBB9_3
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movzbl 4(%eax), %eax
-; X86-NEXT: movzbl 4(%ecx), %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: jmp .LBB9_2
-; X86-NEXT: .LBB9_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB9_2: # %endblock
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
- ret i32 %m
-}
-
-define i1 @length5_eq(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length5_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: movb 4(%ecx), %cl
-; X86-NEXT: xorb 4(%eax), %cl
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: orl %edx, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length8(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length8:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl (%esi), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB11_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 4(%esi), %ecx
-; X86-NEXT: movl 4(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: je .LBB11_3
-; X86-NEXT: .LBB11_2: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB11_3: # %endblock
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
- ret i32 %m
-}
-
-define i1 @length8_eq(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length8_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %ecx
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: xorl 4(%eax), %ecx
-; X86-NEXT: orl %edx, %ecx
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length8_eq_const(ptr %X) nounwind optsize {
-; X86-LABEL: length8_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130
-; X86-NEXT: xorl (%eax), %ecx
-; X86-NEXT: movl $926299444, %edx # imm = 0x37363534
-; X86-NEXT: xorl 4(%eax), %edx
-; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length12_eq(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length12_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $12
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length12(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length12:
-; X86: # %bb.0:
-; X86-NEXT: pushl $12
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
- ret i32 %m
-}
-
-; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
-
-define i32 @length16(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length16:
-; X86: # %bb.0:
-; X86-NEXT: pushl $16
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind
- ret i32 %m
-}
-
-define i1 @length16_eq(ptr %x, ptr %y) nounwind optsize {
-; X86-NOSSE-LABEL: length16_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $16
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length16_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu (%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_eq_const(ptr %X) nounwind optsize {
-; X86-NOSSE-LABEL: length16_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $16
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length16_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
-
-define i32 @length24(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length24:
-; X86: # %bb.0:
-; X86-NEXT: pushl $24
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind
- ret i32 %m
-}
-
-define i1 @length24_eq(ptr %x, ptr %y) nounwind optsize {
-; X86-NOSSE-LABEL: length24_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $24
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length24_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_eq_const(ptr %X) nounwind optsize {
-; X86-NOSSE-LABEL: length24_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $24
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length24_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length32(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length32:
-; X86: # %bb.0:
-; X86-NEXT: pushl $32
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind
- ret i32 %m
-}
-
-; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
-
-define i1 @length32_eq(ptr %x, ptr %y) nounwind optsize {
-; X86-NOSSE-LABEL: length32_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $32
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length32_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_const(ptr %X) nounwind optsize {
-; X86-NOSSE-LABEL: length32_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $32
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length32_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length64(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: length64:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind
- ret i32 %m
-}
-
-define i1 @length64_eq(ptr %x, ptr %y) nounwind optsize {
-; X86-LABEL: length64_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_eq_const(ptr %X) nounwind optsize {
-; X86-LABEL: length64_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind optsize {
-; X86-LABEL: bcmp_length2:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpw (%ecx), %dx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @bcmp(ptr %X, ptr %Y, i32 2) nounwind
- ret i32 %m
-}
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll
deleted file mode 100644
index c0c7b98d471cd4..00000000000000
--- a/llvm/test/CodeGen/X86/memcmp-optsize.ll
+++ /dev/null
@@ -1,596 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
-
-; This tests codegen time inlining/optimization of memcmp
-; rdar://6480398
-
- at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
-
-declare dso_local i32 @memcmp(ptr, ptr, i64)
-declare dso_local i32 @bcmp(ptr, ptr, i64)
-
-define i32 @length2(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length2:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- ret i32 %m
-}
-
-define i1 @length2_eq(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length2_eq:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: cmpw (%rsi), %ax
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_const(ptr %X) nounwind optsize {
-; X64-LABEL: length2_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length2_eq_nobuiltin_attr:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $2, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length3(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length3:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %ecx
-; X64-NEXT: movzwl (%rsi), %edx
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: rolw $8, %dx
-; X64-NEXT: cmpw %dx, %cx
-; X64-NEXT: jne .LBB4_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movzbl 2(%rdi), %eax
-; X64-NEXT: movzbl 2(%rsi), %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB4_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpw %dx, %cx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
- ret i32 %m
-}
-
-define i1 @length3_eq(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length3_eq:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: xorw (%rsi), %ax
-; X64-NEXT: movb 2(%rdi), %cl
-; X64-NEXT: xorb 2(%rsi), %cl
-; X64-NEXT: movzbl %cl, %ecx
-; X64-NEXT: orw %ax, %cx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length4(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length4:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- ret i32 %m
-}
-
-define i1 @length4_eq(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length4_eq:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: cmpl (%rsi), %eax
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_eq_const(ptr %X) nounwind optsize {
-; X64-LABEL: length4_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length5(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length5:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: jne .LBB9_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movzbl 4(%rdi), %eax
-; X64-NEXT: movzbl 4(%rsi), %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB9_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- ret i32 %m
-}
-
-define i1 @length5_eq(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length5_eq:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: xorl (%rsi), %eax
-; X64-NEXT: movb 4(%rdi), %cl
-; X64-NEXT: xorb 4(%rsi), %cl
-; X64-NEXT: movzbl %cl, %ecx
-; X64-NEXT: orl %eax, %ecx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length8(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length8:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
- ret i32 %m
-}
-
-define i1 @length8_eq(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length8_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: cmpq (%rsi), %rax
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length8_eq_const(ptr %X) nounwind optsize {
-; X64-LABEL: length8_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
-; X64-NEXT: cmpq %rax, (%rdi)
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length12_eq(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length12_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: movl 8(%rdi), %ecx
-; X64-NEXT: xorl 8(%rsi), %ecx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length12(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length12:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB15_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movl 8(%rdi), %ecx
-; X64-NEXT: movl 8(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB15_3
-; X64-NEXT: .LBB15_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB15_3: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
- ret i32 %m
-}
-
-; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
-
-define i32 @length16(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length16:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB16_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 8(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB16_3
-; X64-NEXT: .LBB16_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB16_3: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
- ret i32 %m
-}
-
-define i1 @length16_eq(ptr %x, ptr %y) nounwind optsize {
-; X64-SSE2-LABEL: length16_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX-LABEL: length16_eq:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: setne %al
-; X64-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_eq_const(ptr %X) nounwind optsize {
-; X64-SSE2-LABEL: length16_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX-LABEL: length16_eq_const:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
-
-define i32 @length24(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length24:
-; X64: # %bb.0:
-; X64-NEXT: movl $24, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
- ret i32 %m
-}
-
-define i1 @length24_eq(ptr %x, ptr %y) nounwind optsize {
-; X64-SSE2-LABEL: length24_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: pand %xmm1, %xmm2
-; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX-LABEL: length24_eq:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_eq_const(ptr %X) nounwind optsize {
-; X64-SSE2-LABEL: length24_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX-LABEL: length24_eq_const:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: setne %al
-; X64-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length32(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length32:
-; X64: # %bb.0:
-; X64-NEXT: movl $32, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
- ret i32 %m
-}
-
-; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
-
-define i1 @length32_eq(ptr %x, ptr %y) nounwind optsize {
-; X64-SSE2-LABEL: length32_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm2, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX1-LABEL: length32_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length32_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_const(ptr %X) nounwind optsize {
-; X64-SSE2-LABEL: length32_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX1-LABEL: length32_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length32_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length64(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: length64:
-; X64: # %bb.0:
-; X64-NEXT: movl $64, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
- ret i32 %m
-}
-
-define i1 @length64_eq(ptr %x, ptr %y) nounwind optsize {
-; X64-SSE2-LABEL: length64_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pushq %rax
-; X64-SSE2-NEXT: movl $64, %edx
-; X64-SSE2-NEXT: callq memcmp
-; X64-SSE2-NEXT: testl %eax, %eax
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: popq %rcx
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX1-LABEL: length64_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length64_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_eq_const(ptr %X) nounwind optsize {
-; X64-SSE2-LABEL: length64_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pushq %rax
-; X64-SSE2-NEXT: movl $.L.str, %esi
-; X64-SSE2-NEXT: movl $64, %edx
-; X64-SSE2-NEXT: callq memcmp
-; X64-SSE2-NEXT: testl %eax, %eax
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: popq %rcx
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX1-LABEL: length64_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length64_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind optsize {
-; X64-LABEL: bcmp_length2:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpw (%rsi), %cx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) nounwind
- ret i32 %m
-}
diff --git a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
deleted file mode 100644
index cb45fd3ebb9068..00000000000000
--- a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
+++ /dev/null
@@ -1,600 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2
-
-; This tests codegen time inlining/optimization of memcmp
-; rdar://6480398
-
- at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
-
-declare dso_local i32 @memcmp(ptr, ptr, i32)
-declare dso_local i32 @bcmp(ptr, ptr, i32)
-
-define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length2:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- ret i32 %m
-}
-
-define i1 @length2_eq(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length2_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: cmpw (%eax), %cx
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_const(ptr %X) nounwind !prof !14 {
-; X86-LABEL: length2_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: cmpl $12849, %eax # imm = 0x3231
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length2_eq_nobuiltin_attr:
-; X86: # %bb.0:
-; X86-NEXT: pushl $2
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length3:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: movzwl (%ecx), %esi
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: rolw $8, %si
-; X86-NEXT: cmpw %si, %dx
-; X86-NEXT: jne .LBB4_3
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movzbl 2(%eax), %eax
-; X86-NEXT: movzbl 2(%ecx), %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: jmp .LBB4_2
-; X86-NEXT: .LBB4_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpw %si, %dx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB4_2: # %endblock
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
- ret i32 %m
-}
-
-define i1 @length3_eq(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length3_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %edx
-; X86-NEXT: xorw (%eax), %dx
-; X86-NEXT: movb 2(%ecx), %cl
-; X86-NEXT: xorb 2(%eax), %cl
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: orw %dx, %ax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length4(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length4:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: seta %al
-; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- ret i32 %m
-}
-
-define i1 @length4_eq(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length4_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: cmpl (%eax), %ecx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_eq_const(ptr %X) nounwind !prof !14 {
-; X86-LABEL: length4_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length5:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: movl (%ecx), %esi
-; X86-NEXT: bswapl %edx
-; X86-NEXT: bswapl %esi
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: jne .LBB9_3
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movzbl 4(%eax), %eax
-; X86-NEXT: movzbl 4(%ecx), %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: jmp .LBB9_2
-; X86-NEXT: .LBB9_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB9_2: # %endblock
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
- ret i32 %m
-}
-
-define i1 @length5_eq(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length5_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: movb 4(%ecx), %cl
-; X86-NEXT: xorb 4(%eax), %cl
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: orl %edx, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length8(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length8:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl (%esi), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB11_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 4(%esi), %ecx
-; X86-NEXT: movl 4(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: je .LBB11_3
-; X86-NEXT: .LBB11_2: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB11_3: # %endblock
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
- ret i32 %m
-}
-
-define i1 @length8_eq(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length8_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %ecx
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: xorl 4(%eax), %ecx
-; X86-NEXT: orl %edx, %ecx
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length8_eq_const(ptr %X) nounwind !prof !14 {
-; X86-LABEL: length8_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130
-; X86-NEXT: xorl (%eax), %ecx
-; X86-NEXT: movl $926299444, %edx # imm = 0x37363534
-; X86-NEXT: xorl 4(%eax), %edx
-; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length12_eq(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length12_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $12
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length12:
-; X86: # %bb.0:
-; X86-NEXT: pushl $12
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
- ret i32 %m
-}
-
-; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
-
-define i32 @length16(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length16:
-; X86: # %bb.0:
-; X86-NEXT: pushl $16
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind
- ret i32 %m
-}
-
-define i1 @length16_eq(ptr %x, ptr %y) nounwind !prof !14 {
-; X86-NOSSE-LABEL: length16_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $16
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length16_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu (%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_eq_const(ptr %X) nounwind !prof !14 {
-; X86-NOSSE-LABEL: length16_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $16
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length16_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
-
-define i32 @length24(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length24:
-; X86: # %bb.0:
-; X86-NEXT: pushl $24
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind
- ret i32 %m
-}
-
-define i1 @length24_eq(ptr %x, ptr %y) nounwind !prof !14 {
-; X86-NOSSE-LABEL: length24_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $24
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length24_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_eq_const(ptr %X) nounwind !prof !14 {
-; X86-NOSSE-LABEL: length24_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $24
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length24_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length32(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length32:
-; X86: # %bb.0:
-; X86-NEXT: pushl $32
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind
- ret i32 %m
-}
-
-; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
-
-define i1 @length32_eq(ptr %x, ptr %y) nounwind !prof !14 {
-; X86-NOSSE-LABEL: length32_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $32
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length32_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_const(ptr %X) nounwind !prof !14 {
-; X86-NOSSE-LABEL: length32_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $32
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: length32_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length64(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: length64:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind
- ret i32 %m
-}
-
-define i1 @length64_eq(ptr %x, ptr %y) nounwind !prof !14 {
-; X86-LABEL: length64_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_eq_const(ptr %X) nounwind !prof !14 {
-; X86-LABEL: length64_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind !prof !14 {
-; X86-LABEL: bcmp_length2:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpw (%ecx), %dx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @bcmp(ptr %X, ptr %Y, i32 2) nounwind
- ret i32 %m
-}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"ProfileSummary", !1}
-!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
-!2 = !{!"ProfileFormat", !"InstrProf"}
-!3 = !{!"TotalCount", i32 10000}
-!4 = !{!"MaxCount", i32 10}
-!5 = !{!"MaxInternalCount", i32 1}
-!6 = !{!"MaxFunctionCount", i32 1000}
-!7 = !{!"NumCounts", i32 3}
-!8 = !{!"NumFunctions", i32 3}
-!9 = !{!"DetailedSummary", !10}
-!10 = !{!11, !12, !13}
-!11 = !{i32 10000, i32 100, i32 1}
-!12 = !{i32 999000, i32 100, i32 1}
-!13 = !{i32 999999, i32 1, i32 2}
-!14 = !{!"function_entry_count", i32 0}
diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll
deleted file mode 100644
index 720344a22e43b5..00000000000000
--- a/llvm/test/CodeGen/X86/memcmp-pgso.ll
+++ /dev/null
@@ -1,613 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
-
-; This tests codegen time inlining/optimization of memcmp
-; rdar://6480398
-
- at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
-
-declare dso_local i32 @memcmp(ptr, ptr, i64)
-declare dso_local i32 @bcmp(ptr, ptr, i64)
-
-define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length2:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- ret i32 %m
-}
-
-define i1 @length2_eq(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length2_eq:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: cmpw (%rsi), %ax
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_const(ptr %X) nounwind !prof !14 {
-; X64-LABEL: length2_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length2_eq_nobuiltin_attr:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $2, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length3:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %ecx
-; X64-NEXT: movzwl (%rsi), %edx
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: rolw $8, %dx
-; X64-NEXT: cmpw %dx, %cx
-; X64-NEXT: jne .LBB4_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movzbl 2(%rdi), %eax
-; X64-NEXT: movzbl 2(%rsi), %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB4_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpw %dx, %cx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
- ret i32 %m
-}
-
-define i1 @length3_eq(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length3_eq:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: xorw (%rsi), %ax
-; X64-NEXT: movb 2(%rdi), %cl
-; X64-NEXT: xorb 2(%rsi), %cl
-; X64-NEXT: movzbl %cl, %ecx
-; X64-NEXT: orw %ax, %cx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length4(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length4:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- ret i32 %m
-}
-
-define i1 @length4_eq(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length4_eq:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: cmpl (%rsi), %eax
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_eq_const(ptr %X) nounwind !prof !14 {
-; X64-LABEL: length4_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length5:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: jne .LBB9_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movzbl 4(%rdi), %eax
-; X64-NEXT: movzbl 4(%rsi), %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB9_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- ret i32 %m
-}
-
-define i1 @length5_eq(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length5_eq:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: xorl (%rsi), %eax
-; X64-NEXT: movb 4(%rdi), %cl
-; X64-NEXT: xorb 4(%rsi), %cl
-; X64-NEXT: movzbl %cl, %ecx
-; X64-NEXT: orl %eax, %ecx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length8(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length8:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
- ret i32 %m
-}
-
-define i1 @length8_eq(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length8_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: cmpq (%rsi), %rax
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length8_eq_const(ptr %X) nounwind !prof !14 {
-; X64-LABEL: length8_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
-; X64-NEXT: cmpq %rax, (%rdi)
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length12_eq(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length12_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: movl 8(%rdi), %ecx
-; X64-NEXT: xorl 8(%rsi), %ecx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length12:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB15_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movl 8(%rdi), %ecx
-; X64-NEXT: movl 8(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB15_3
-; X64-NEXT: .LBB15_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB15_3: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
- ret i32 %m
-}
-
-; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
-
-define i32 @length16(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length16:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB16_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 8(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB16_3
-; X64-NEXT: .LBB16_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB16_3: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
- ret i32 %m
-}
-
-define i1 @length16_eq(ptr %x, ptr %y) nounwind !prof !14 {
-; X64-SSE2-LABEL: length16_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX-LABEL: length16_eq:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: setne %al
-; X64-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_eq_const(ptr %X) nounwind !prof !14 {
-; X64-SSE2-LABEL: length16_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX-LABEL: length16_eq_const:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
-
-define i32 @length24(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length24:
-; X64: # %bb.0:
-; X64-NEXT: movl $24, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
- ret i32 %m
-}
-
-define i1 @length24_eq(ptr %x, ptr %y) nounwind !prof !14 {
-; X64-SSE2-LABEL: length24_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: pand %xmm1, %xmm2
-; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX-LABEL: length24_eq:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_eq_const(ptr %X) nounwind !prof !14 {
-; X64-SSE2-LABEL: length24_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX-LABEL: length24_eq_const:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: setne %al
-; X64-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length32(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length32:
-; X64: # %bb.0:
-; X64-NEXT: movl $32, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
- ret i32 %m
-}
-
-; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
-
-define i1 @length32_eq(ptr %x, ptr %y) nounwind !prof !14 {
-; X64-SSE2-LABEL: length32_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm2, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX1-LABEL: length32_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length32_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_const(ptr %X) nounwind !prof !14 {
-; X64-SSE2-LABEL: length32_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX1-LABEL: length32_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length32_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length64(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: length64:
-; X64: # %bb.0:
-; X64-NEXT: movl $64, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
- ret i32 %m
-}
-
-define i1 @length64_eq(ptr %x, ptr %y) nounwind !prof !14 {
-; X64-SSE2-LABEL: length64_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pushq %rax
-; X64-SSE2-NEXT: movl $64, %edx
-; X64-SSE2-NEXT: callq memcmp
-; X64-SSE2-NEXT: testl %eax, %eax
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: popq %rcx
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX1-LABEL: length64_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length64_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_eq_const(ptr %X) nounwind !prof !14 {
-; X64-SSE2-LABEL: length64_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pushq %rax
-; X64-SSE2-NEXT: movl $.L.str, %esi
-; X64-SSE2-NEXT: movl $64, %edx
-; X64-SSE2-NEXT: callq memcmp
-; X64-SSE2-NEXT: testl %eax, %eax
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: popq %rcx
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX1-LABEL: length64_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length64_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind !prof !14 {
-; X64-LABEL: bcmp_length2:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpw (%rsi), %cx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) nounwind
- ret i32 %m
-}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"ProfileSummary", !1}
-!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
-!2 = !{!"ProfileFormat", !"InstrProf"}
-!3 = !{!"TotalCount", i64 10000}
-!4 = !{!"MaxCount", i64 10}
-!5 = !{!"MaxInternalCount", i64 1}
-!6 = !{!"MaxFunctionCount", i64 1000}
-!7 = !{!"NumCounts", i64 3}
-!8 = !{!"NumFunctions", i64 3}
-!9 = !{!"DetailedSummary", !10}
-!10 = !{!11, !12, !13}
-!11 = !{i32 10000, i64 100, i32 1}
-!12 = !{i32 999000, i64 100, i32 1}
-!13 = !{i32 999999, i64 1, i32 2}
-!14 = !{!"function_entry_count", i64 0}
diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll
deleted file mode 100644
index a63402cea20962..00000000000000
--- a/llvm/test/CodeGen/X86/memcmp-x32.ll
+++ /dev/null
@@ -1,2429 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,X86-SSE1
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86,X86-SSE41
-
-; This tests codegen time inlining/optimization of memcmp
-; rdar://6480398
-
- at .str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1
-
-declare dso_local i32 @memcmp(ptr, ptr, i32)
-
-define i32 @length0(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length0:
-; X86: # %bb.0:
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind
- ret i32 %m
- }
-
-define i1 @length0_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length0_eq:
-; X86: # %bb.0:
-; X86-NEXT: movb $1, %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length0_lt:
-; X86: # %bb.0:
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length2(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length2:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- ret i32 %m
-}
-
-define i32 @length2_const(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length2_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: addl $-12594, %eax # imm = 0xCECE
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
- ret i32 %m
-}
-
-define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length2_gt_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: addl $-12594, %eax # imm = 0xCECE
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length2_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: cmpw (%eax), %cx
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length2_lt:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length2_gt:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movzwl %cx, %ecx
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: subl %eax, %ecx
-; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_const(ptr %X) nounwind {
-; X86-LABEL: length2_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: cmpl $12849, %eax # imm = 0x3231
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length2_eq_nobuiltin_attr:
-; X86: # %bb.0:
-; X86-NEXT: pushl $2
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length3(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length3:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: movzwl (%ecx), %esi
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: rolw $8, %si
-; X86-NEXT: cmpw %si, %dx
-; X86-NEXT: jne .LBB11_3
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movzbl 2(%eax), %eax
-; X86-NEXT: movzbl 2(%ecx), %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
-; X86-NEXT: .LBB11_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpw %si, %dx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
- ret i32 %m
-}
-
-define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length3_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %edx
-; X86-NEXT: xorw (%eax), %dx
-; X86-NEXT: movzbl 2(%ecx), %ecx
-; X86-NEXT: xorb 2(%eax), %cl
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: orw %dx, %ax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length4(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length4:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: seta %al
-; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- ret i32 %m
-}
-
-define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length4_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: cmpl (%eax), %ecx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length4_lt:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %eax
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %eax
-; X86-NEXT: cmpl %eax, %ecx
-; X86-NEXT: setb %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length4_gt:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %eax
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %eax
-; X86-NEXT: cmpl %eax, %ecx
-; X86-NEXT: seta %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_eq_const(ptr %X) nounwind {
-; X86-LABEL: length4_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length5(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length5:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: movl (%ecx), %esi
-; X86-NEXT: bswapl %edx
-; X86-NEXT: bswapl %esi
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: jne .LBB18_3
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movzbl 4(%eax), %eax
-; X86-NEXT: movzbl 4(%ecx), %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
-; X86-NEXT: .LBB18_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
- ret i32 %m
-}
-
-define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length5_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: movzbl 4(%ecx), %ecx
-; X86-NEXT: xorb 4(%eax), %cl
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: orl %edx, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length5_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: movl (%ecx), %esi
-; X86-NEXT: bswapl %edx
-; X86-NEXT: bswapl %esi
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: jne .LBB20_3
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movzbl 4(%eax), %eax
-; X86-NEXT: movzbl 4(%ecx), %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: jmp .LBB20_2
-; X86-NEXT: .LBB20_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB20_2: # %endblock
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length7(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length7:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl (%esi), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB21_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 3(%esi), %ecx
-; X86-NEXT: movl 3(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: je .LBB21_3
-; X86-NEXT: .LBB21_2: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB21_3: # %endblock
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind
- ret i32 %m
-}
-
-define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length7_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl (%esi), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB22_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 3(%esi), %ecx
-; X86-NEXT: movl 3(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: je .LBB22_3
-; X86-NEXT: .LBB22_2: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB22_3: # %endblock
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length7_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length7_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 3(%ecx), %ecx
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: xorl 3(%eax), %ecx
-; X86-NEXT: orl %edx, %ecx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length8(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length8:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl (%esi), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: jne .LBB24_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 4(%esi), %ecx
-; X86-NEXT: movl 4(%eax), %edx
-; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: je .LBB24_3
-; X86-NEXT: .LBB24_2: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: .LBB24_3: # %endblock
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
- ret i32 %m
-}
-
-define i1 @length8_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length8_eq:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %ecx
-; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: xorl 4(%eax), %ecx
-; X86-NEXT: orl %edx, %ecx
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length8_eq_const(ptr %X) nounwind {
-; X86-LABEL: length8_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130
-; X86-NEXT: xorl (%eax), %ecx
-; X86-NEXT: movl $926299444, %edx # imm = 0x37363534
-; X86-NEXT: xorl 4(%eax), %edx
-; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length9_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $9
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length10_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length10_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $10
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 10) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length11_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length11_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $11
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 11) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length12_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length12_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $12
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length12(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length12:
-; X86: # %bb.0:
-; X86-NEXT: pushl $12
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
- ret i32 %m
-}
-
-define i1 @length13_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length13_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $13
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 13) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length14_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length14_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $14
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 14) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length15(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length15:
-; X86: # %bb.0:
-; X86-NEXT: pushl $15
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind
- ret i32 %m
-}
-
-define i1 @length15_lt(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length15_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $15
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length15_const(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length15_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $15
-; X86-NEXT: pushl $.L.str+1
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) nounwind
- ret i32 %m
-}
-
-define i1 @length15_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length15_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $15
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length15_gt_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $15
-; X86-NEXT: pushl $.L.str+1
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
-
-define i32 @length16(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length16:
-; X86: # %bb.0:
-; X86-NEXT: pushl $16
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind
- ret i32 %m
-}
-
-define i1 @length16_eq(ptr %x, ptr %y) nounwind {
-; X86-NOSSE-LABEL: length16_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $16
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length16_eq:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $16
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: setne %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length16_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu (%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length16_eq:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu (%eax), %xmm1
-; X86-SSE41-NEXT: pxor %xmm0, %xmm1
-; X86-SSE41-NEXT: ptest %xmm1, %xmm1
-; X86-SSE41-NEXT: setne %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length16_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $16
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length16_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $16
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_eq_const(ptr %X) nounwind {
-; X86-NOSSE-LABEL: length16_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $16
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length16_eq_const:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $16
-; X86-SSE1-NEXT: pushl $.L.str
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length16_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length16_eq_const:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movdqu (%eax), %xmm0
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
-
-define i32 @length24(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length24:
-; X86: # %bb.0:
-; X86-NEXT: pushl $24
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind
- ret i32 %m
-}
-
-define i1 @length24_eq(ptr %x, ptr %y) nounwind {
-; X86-NOSSE-LABEL: length24_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $24
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length24_eq:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $24
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length24_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length24_eq:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 8(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 8(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length24_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $24
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length24_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $24
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_eq_const(ptr %X) nounwind {
-; X86-NOSSE-LABEL: length24_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $24
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length24_eq_const:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $24
-; X86-SSE1-NEXT: pushl $.L.str
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: setne %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length24_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length24_eq_const:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movdqu (%eax), %xmm0
-; X86-SSE41-NEXT: movdqu 8(%eax), %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE41-NEXT: por %xmm1, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: setne %al
-; X86-SSE41-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length31(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length31:
-; X86: # %bb.0:
-; X86-NEXT: pushl $31
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 31) nounwind
- ret i32 %m
-}
-
-define i1 @length31_eq(ptr %x, ptr %y) nounwind {
-; X86-NOSSE-LABEL: length31_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $31
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length31_eq:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $31
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length31_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length31_eq:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length31_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $31
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length31_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $31
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; X86-NOSSE-LABEL: length31_eq_prefer128:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $31
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length31_eq_prefer128:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $31
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length31_eq_prefer128:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length31_eq_prefer128:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_eq_const(ptr %X) nounwind {
-; X86-NOSSE-LABEL: length31_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $31
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length31_eq_const:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $31
-; X86-SSE1-NEXT: pushl $.L.str
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: setne %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length31_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 15(%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length31_eq_const:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movdqu (%eax), %xmm0
-; X86-SSE41-NEXT: movdqu 15(%eax), %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE41-NEXT: por %xmm1, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: setne %al
-; X86-SSE41-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 31) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length32(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length32:
-; X86: # %bb.0:
-; X86-NEXT: pushl $32
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind
- ret i32 %m
-}
-
-; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
-
-define i1 @length32_eq(ptr %x, ptr %y) nounwind {
-; X86-NOSSE-LABEL: length32_eq:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $32
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length32_eq:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $32
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length32_eq:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length32_eq:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length32_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $32
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length32_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $32
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; X86-NOSSE-LABEL: length32_eq_prefer128:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $32
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: sete %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length32_eq_prefer128:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $32
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: sete %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length32_eq_prefer128:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE2-NEXT: movdqu (%eax), %xmm2
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: pand %xmm2, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: sete %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length32_eq_prefer128:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1
-; X86-SSE41-NEXT: movdqu (%eax), %xmm2
-; X86-SSE41-NEXT: pxor %xmm0, %xmm2
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE41-NEXT: pxor %xmm1, %xmm0
-; X86-SSE41-NEXT: por %xmm2, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: sete %al
-; X86-SSE41-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_const(ptr %X) nounwind {
-; X86-NOSSE-LABEL: length32_eq_const:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl $32
-; X86-NOSSE-NEXT: pushl $.L.str
-; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: calll memcmp
-; X86-NOSSE-NEXT: addl $12, %esp
-; X86-NOSSE-NEXT: testl %eax, %eax
-; X86-NOSSE-NEXT: setne %al
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE1-LABEL: length32_eq_const:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl $32
-; X86-SSE1-NEXT: pushl $.L.str
-; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: calll memcmp
-; X86-SSE1-NEXT: addl $12, %esp
-; X86-SSE1-NEXT: testl %eax, %eax
-; X86-SSE1-NEXT: setne %al
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: length32_eq_const:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: pand %xmm1, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-SSE2-NEXT: setne %al
-; X86-SSE2-NEXT: retl
-;
-; X86-SSE41-LABEL: length32_eq_const:
-; X86-SSE41: # %bb.0:
-; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movdqu (%eax), %xmm0
-; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE41-NEXT: por %xmm1, %xmm0
-; X86-SSE41-NEXT: ptest %xmm0, %xmm0
-; X86-SSE41-NEXT: setne %al
-; X86-SSE41-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length48(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length48:
-; X86: # %bb.0:
-; X86-NEXT: pushl $48
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 48) nounwind
- ret i32 %m
-}
-
-define i1 @length48_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length48_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $48
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length48_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $48
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length48_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $48
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; X86-LABEL: length48_eq_prefer128:
-; X86: # %bb.0:
-; X86-NEXT: pushl $48
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_eq_const(ptr %X) nounwind {
-; X86-LABEL: length48_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $48
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 48) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length63(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length63:
-; X86: # %bb.0:
-; X86-NEXT: pushl $63
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 63) nounwind
- ret i32 %m
-}
-
-define i1 @length63_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length63_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $63
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length63_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $63
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length63_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $63
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_eq_const(ptr %X) nounwind {
-; X86-LABEL: length63_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $63
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 63) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length64(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length64:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind
- ret i32 %m
-}
-
-define i1 @length64_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length64_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length64_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length64_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_eq_const(ptr %X) nounwind {
-; X86-LABEL: length64_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $64
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length96(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length96:
-; X86: # %bb.0:
-; X86-NEXT: pushl $96
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 96) nounwind
- ret i32 %m
-}
-
-define i1 @length96_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length96_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $96
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length96_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $96
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length96_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $96
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_eq_const(ptr %X) nounwind {
-; X86-LABEL: length96_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $96
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 96) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length127(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length127:
-; X86: # %bb.0:
-; X86-NEXT: pushl $127
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 127) nounwind
- ret i32 %m
-}
-
-define i1 @length127_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length127_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $127
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length127_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $127
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length127_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $127
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_eq_const(ptr %X) nounwind {
-; X86-LABEL: length127_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $127
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 127) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length128(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length128:
-; X86: # %bb.0:
-; X86-NEXT: pushl $128
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 128) nounwind
- ret i32 %m
-}
-
-define i1 @length128_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length128_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $128
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length128_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $128
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length128_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $128
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_eq_const(ptr %X) nounwind {
-; X86-LABEL: length128_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $128
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 128) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length192(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length192:
-; X86: # %bb.0:
-; X86-NEXT: pushl $192
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 192) nounwind
- ret i32 %m
-}
-
-define i1 @length192_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length192_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $192
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length192_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $192
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length192_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $192
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_eq_const(ptr %X) nounwind {
-; X86-LABEL: length192_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $192
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 192) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length255(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length255:
-; X86: # %bb.0:
-; X86-NEXT: pushl $255
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 255) nounwind
- ret i32 %m
-}
-
-define i1 @length255_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length255_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $255
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length255_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $255
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length255_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $255
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_eq_const(ptr %X) nounwind {
-; X86-LABEL: length255_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $255
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 255) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length256(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length256:
-; X86: # %bb.0:
-; X86-NEXT: pushl $256 # imm = 0x100
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 256) nounwind
- ret i32 %m
-}
-
-define i1 @length256_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length256_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $256 # imm = 0x100
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length256_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $256 # imm = 0x100
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length256_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $256 # imm = 0x100
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_eq_const(ptr %X) nounwind {
-; X86-LABEL: length256_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $256 # imm = 0x100
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 256) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length384(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length384:
-; X86: # %bb.0:
-; X86-NEXT: pushl $384 # imm = 0x180
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 384) nounwind
- ret i32 %m
-}
-
-define i1 @length384_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length384_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $384 # imm = 0x180
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length384_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $384 # imm = 0x180
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length384_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $384 # imm = 0x180
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_eq_const(ptr %X) nounwind {
-; X86-LABEL: length384_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $384 # imm = 0x180
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 384) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length511(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length511:
-; X86: # %bb.0:
-; X86-NEXT: pushl $511 # imm = 0x1FF
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 511) nounwind
- ret i32 %m
-}
-
-define i1 @length511_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length511_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $511 # imm = 0x1FF
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length511_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $511 # imm = 0x1FF
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length511_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $511 # imm = 0x1FF
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_eq_const(ptr %X) nounwind {
-; X86-LABEL: length511_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $511 # imm = 0x1FF
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 511) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length512(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: length512:
-; X86: # %bb.0:
-; X86-NEXT: pushl $512 # imm = 0x200
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 512) nounwind
- ret i32 %m
-}
-
-define i1 @length512_eq(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length512_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $512 # imm = 0x200
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_lt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length512_lt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $512 # imm = 0x200
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_gt(ptr %x, ptr %y) nounwind {
-; X86-LABEL: length512_gt:
-; X86: # %bb.0:
-; X86-NEXT: pushl $512 # imm = 0x200
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %al
-; X86-NEXT: retl
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_eq_const(ptr %X) nounwind {
-; X86-LABEL: length512_eq_const:
-; X86: # %bb.0:
-; X86-NEXT: pushl $512 # imm = 0x200
-; X86-NEXT: pushl $.L.str
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 512) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; This checks that we do not do stupid things with huge sizes.
-define i32 @huge_length(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: huge_length:
-; X86: # %bb.0:
-; X86-NEXT: pushl $-1
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind
- ret i32 %m
-}
-
-define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind {
-; X86-LABEL: huge_length_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl $-1
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; This checks non-constant sizes.
-define i32 @nonconst_length(ptr %X, ptr %Y, i32 %size) nounwind {
-; X86-LABEL: nonconst_length:
-; X86: # %bb.0:
-; X86-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind
- ret i32 %m
-}
-
-define i1 @nonconst_length_eq(ptr %X, ptr %Y, i32 %size) nounwind {
-; X86-LABEL: nonconst_length_eq:
-; X86: # %bb.0:
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll memcmp
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: retl
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
deleted file mode 100644
index f5e7384362a92b..00000000000000
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ /dev/null
@@ -1,3065 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX512F
-
-; This tests codegen time inlining/optimization of memcmp
-; rdar://6480398
-
- at .str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1
-
-declare dso_local i32 @memcmp(ptr, ptr, i64)
-
-define i32 @length0(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length0:
-; X64: # %bb.0:
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
- ret i32 %m
- }
-
-define i1 @length0_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length0_eq:
-; X64: # %bb.0:
-; X64-NEXT: movb $1, %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length0_lt:
-; X64: # %bb.0:
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length2(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length2:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- ret i32 %m
-}
-
-define i32 @length2_const(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length2_const:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: addl $-12594, %eax # imm = 0xCECE
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
- ret i32 %m
-}
-
-define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length2_gt_const:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: addl $-12594, %eax # imm = 0xCECE
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length2_eq:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: cmpw (%rsi), %ax
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length2_lt:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length2_gt:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_const(ptr %X) nounwind {
-; X64-LABEL: length2_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length2_eq_nobuiltin_attr:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $2, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length3(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length3:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %ecx
-; X64-NEXT: movzwl (%rsi), %edx
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: rolw $8, %dx
-; X64-NEXT: cmpw %dx, %cx
-; X64-NEXT: jne .LBB11_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movzbl 2(%rdi), %eax
-; X64-NEXT: movzbl 2(%rsi), %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB11_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpw %dx, %cx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
- ret i32 %m
-}
-
-define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length3_eq:
-; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: xorw (%rsi), %ax
-; X64-NEXT: movzbl 2(%rdi), %ecx
-; X64-NEXT: xorb 2(%rsi), %cl
-; X64-NEXT: movzbl %cl, %ecx
-; X64-NEXT: orw %ax, %cx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length4(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length4:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- ret i32 %m
-}
-
-define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length4_eq:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: cmpl (%rsi), %eax
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length4_lt:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: movl (%rsi), %ecx
-; X64-NEXT: bswapl %eax
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: cmpl %ecx, %eax
-; X64-NEXT: setb %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length4_gt:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: movl (%rsi), %ecx
-; X64-NEXT: bswapl %eax
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: cmpl %ecx, %eax
-; X64-NEXT: seta %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length4_eq_const(ptr %X) nounwind {
-; X64-LABEL: length4_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length5(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length5:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: jne .LBB18_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movzbl 4(%rdi), %eax
-; X64-NEXT: movzbl 4(%rsi), %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB18_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- ret i32 %m
-}
-
-define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length5_eq:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: xorl (%rsi), %eax
-; X64-NEXT: movzbl 4(%rdi), %ecx
-; X64-NEXT: xorb 4(%rsi), %cl
-; X64-NEXT: movzbl %cl, %ecx
-; X64-NEXT: orl %eax, %ecx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length5_lt:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: jne .LBB20_3
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movzbl 4(%rdi), %eax
-; X64-NEXT: movzbl 4(%rsi), %ecx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB20_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length7(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length7:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: jne .LBB21_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movl 3(%rdi), %ecx
-; X64-NEXT: movl 3(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: je .LBB21_3
-; X64-NEXT: .LBB21_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB21_3: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
- ret i32 %m
-}
-
-define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length7_lt:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: jne .LBB22_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movl 3(%rdi), %ecx
-; X64-NEXT: movl 3(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: je .LBB22_3
-; X64-NEXT: .LBB22_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB22_3: # %endblock
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length7_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length7_eq:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: movl 3(%rdi), %ecx
-; X64-NEXT: xorl (%rsi), %eax
-; X64-NEXT: xorl 3(%rsi), %ecx
-; X64-NEXT: orl %eax, %ecx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length8(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length8:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
- ret i32 %m
-}
-
-define i1 @length8_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length8_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: cmpq (%rsi), %rax
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length8_eq_const(ptr %X) nounwind {
-; X64-LABEL: length8_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
-; X64-NEXT: cmpq %rax, (%rdi)
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length9_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: movzbl 8(%rdi), %ecx
-; X64-NEXT: xorb 8(%rsi), %cl
-; X64-NEXT: movzbl %cl, %ecx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length10_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length10_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: movzwl 8(%rdi), %ecx
-; X64-NEXT: xorw 8(%rsi), %cx
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length11_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length11_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq 3(%rdi), %rcx
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: xorq 3(%rsi), %rcx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length12_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length12_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: movl 8(%rdi), %ecx
-; X64-NEXT: xorl 8(%rsi), %ecx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: setne %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length12(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length12:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB31_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movl 8(%rdi), %ecx
-; X64-NEXT: movl 8(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB31_3
-; X64-NEXT: .LBB31_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB31_3: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
- ret i32 %m
-}
-
-define i1 @length13_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length13_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq 5(%rdi), %rcx
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: xorq 5(%rsi), %rcx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length14_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length14_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq 6(%rdi), %rcx
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: xorq 6(%rsi), %rcx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length15(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length15:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB34_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 7(%rdi), %rcx
-; X64-NEXT: movq 7(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB34_3
-; X64-NEXT: .LBB34_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB34_3: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
- ret i32 %m
-}
-
-define i1 @length15_lt(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length15_lt:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB35_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 7(%rdi), %rcx
-; X64-NEXT: movq 7(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB35_3
-; X64-NEXT: .LBB35_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB35_3: # %endblock
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
- %c = icmp slt i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length15_const(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length15_const:
-; X64: # %bb.0:
-; X64-NEXT: movabsq $3544952156018063160, %rcx # imm = 0x3132333435363738
-; X64-NEXT: movq (%rdi), %rdx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rcx, %rdx
-; X64-NEXT: jne .LBB36_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movabsq $4051322327650219061, %rcx # imm = 0x3839303132333435
-; X64-NEXT: movq 7(%rdi), %rdx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rcx, %rdx
-; X64-NEXT: je .LBB36_3
-; X64-NEXT: .LBB36_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rcx, %rdx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB36_3: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind
- ret i32 %m
-}
-
-define i1 @length15_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length15_eq:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq 7(%rdi), %rcx
-; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: xorq 7(%rsi), %rcx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length15_gt_const:
-; X64: # %bb.0:
-; X64-NEXT: movabsq $3544952156018063160, %rax # imm = 0x3132333435363738
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: cmpq %rax, %rcx
-; X64-NEXT: jne .LBB38_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movabsq $4051322327650219061, %rax # imm = 0x3839303132333435
-; X64-NEXT: movq 7(%rdi), %rcx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rax, %rcx
-; X64-NEXT: je .LBB38_3
-; X64-NEXT: .LBB38_2: # %res_block
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rax, %rcx
-; X64-NEXT: sbbl %edx, %edx
-; X64-NEXT: orl $1, %edx
-; X64-NEXT: .LBB38_3: # %endblock
-; X64-NEXT: testl %edx, %edx
-; X64-NEXT: setg %al
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind
- %c = icmp sgt i32 %m, 0
- ret i1 %c
-}
-
-; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
-
-define i32 @length16(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length16:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB39_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 8(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB39_3
-; X64-NEXT: .LBB39_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB39_3: # %endblock
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
- ret i32 %m
-}
-
-define i1 @length16_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE2-LABEL: length16_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length16_eq:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE41-NEXT: pxor %xmm0, %xmm1
-; X64-SSE41-NEXT: ptest %xmm1, %xmm1
-; X64-SSE41-NEXT: setne %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length16_eq:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: setne %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length16_eq:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
-; X64-MIC-AVX-NEXT: kortestw %k0, %k0
-; X64-MIC-AVX-NEXT: setne %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length16_lt:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: jne .LBB41_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 8(%rsi), %rdx
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: je .LBB41_3
-; X64-NEXT: .LBB41_2: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
-; X64-NEXT: .LBB41_3: # %endblock
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length16_gt:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq (%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: jne .LBB42_2
-; X64-NEXT: # %bb.1: # %loadbb1
-; X64-NEXT: movq 8(%rdi), %rax
-; X64-NEXT: movq 8(%rsi), %rcx
-; X64-NEXT: bswapq %rax
-; X64-NEXT: bswapq %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: je .LBB42_3
-; X64-NEXT: .LBB42_2: # %res_block
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbl %edx, %edx
-; X64-NEXT: orl $1, %edx
-; X64-NEXT: .LBB42_3: # %endblock
-; X64-NEXT: testl %edx, %edx
-; X64-NEXT: setg %al
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length16_eq_const(ptr %X) nounwind {
-; X64-SSE2-LABEL: length16_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length16_eq_const:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length16_eq_const:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length16_eq_const:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
-; X64-MIC-AVX-NEXT: kortestw %k0, %k0
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
-
-define i32 @length24(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length24:
-; X64: # %bb.0:
-; X64-NEXT: movl $24, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
- ret i32 %m
-}
-
-define i1 @length24_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE2-LABEL: length24_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: pand %xmm1, %xmm2
-; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length24_eq:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE41-NEXT: pxor %xmm0, %xmm1
-; X64-SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-SSE41-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
-; X64-SSE41-NEXT: pxor %xmm0, %xmm2
-; X64-SSE41-NEXT: por %xmm1, %xmm2
-; X64-SSE41-NEXT: ptest %xmm2, %xmm2
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length24_eq:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length24_eq:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1
-; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length24_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $24, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length24_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $24, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length24_eq_const(ptr %X) nounwind {
-; X64-SSE2-LABEL: length24_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length24_eq_const:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE41-NEXT: por %xmm1, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: setne %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length24_eq_const:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: setne %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length24_eq_const:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [959985462,858927408,0,0]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: setne %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length31(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length31:
-; X64: # %bb.0:
-; X64-NEXT: movl $31, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind
- ret i32 %m
-}
-
-define i1 @length31_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE2-LABEL: length31_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm2, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length31_eq:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE41-NEXT: pxor %xmm0, %xmm2
-; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0
-; X64-SSE41-NEXT: pxor %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm2, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length31_eq:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
-; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length31_eq:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2
-; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length31_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $31, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length31_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $31, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; X64-SSE2-LABEL: length31_eq_prefer128:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm2, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length31_eq_prefer128:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE41-NEXT: pxor %xmm0, %xmm2
-; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0
-; X64-SSE41-NEXT: pxor %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm2, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length31_eq_prefer128:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
-; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length31_eq_prefer128:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2
-; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length31_eq_const(ptr %X) nounwind {
-; X64-SSE2-LABEL: length31_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length31_eq_const:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE41-NEXT: por %xmm1, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: setne %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length31_eq_const:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: setne %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length31_eq_const:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [943142453,842084409,909456435,809056311]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: setne %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length32(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length32:
-; X64: # %bb.0:
-; X64-NEXT: movl $32, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
- ret i32 %m
-}
-
-; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
-
-define i1 @length32_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE2-LABEL: length32_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm2, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length32_eq:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE41-NEXT: pxor %xmm0, %xmm2
-; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE41-NEXT: pxor %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm2, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX1-LABEL: length32_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length32_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length32_eq:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: sete %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length32_eq:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
-; X64-MIC-AVX-NEXT: kortestw %k0, %k0
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length32_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $32, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length32_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $32, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; X64-SSE2-LABEL: length32_eq_prefer128:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X64-SSE2-NEXT: pand %xmm2, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length32_eq_prefer128:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: movdqu (%rsi), %xmm2
-; X64-SSE41-NEXT: pxor %xmm0, %xmm2
-; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0
-; X64-SSE41-NEXT: pxor %xmm1, %xmm0
-; X64-SSE41-NEXT: por %xmm2, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: sete %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX-LABEL: length32_eq_prefer128:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
-; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
-; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vptest %xmm0, %xmm0
-; X64-AVX-NEXT: sete %al
-; X64-AVX-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length32_eq_prefer128:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-MIC-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2
-; X64-MIC-AVX-NEXT: vmovdqu 16(%rsi), %xmm3
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length32_eq_const(ptr %X) nounwind {
-; X64-SSE2-LABEL: length32_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: pand %xmm1, %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
-; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: retq
-;
-; X64-SSE41-LABEL: length32_eq_const:
-; X64-SSE41: # %bb.0:
-; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE41-NEXT: por %xmm1, %xmm0
-; X64-SSE41-NEXT: ptest %xmm0, %xmm0
-; X64-SSE41-NEXT: setne %al
-; X64-SSE41-NEXT: retq
-;
-; X64-AVX1-LABEL: length32_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length32_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length32_eq_const:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: setne %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length32_eq_const:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
-; X64-MIC-AVX-NEXT: kortestw %k0, %k0
-; X64-MIC-AVX-NEXT: setne %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length48(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length48:
-; X64: # %bb.0:
-; X64-NEXT: movl $48, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind
- ret i32 %m
-}
-
-define i1 @length48_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE-LABEL: length48_eq:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $48, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: sete %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length48_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %xmm1
-; X64-AVX1-NEXT: vmovups 32(%rsi), %xmm2
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length48_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %xmm1
-; X64-AVX2-NEXT: vmovdqu 32(%rsi), %xmm2
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length48_eq:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1
-; X64-AVX512-NEXT: vmovdqu 32(%rsi), %xmm2
-; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1
-; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: sete %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length48_eq:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1
-; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2
-; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm3
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length48_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $48, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length48_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $48, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
-; X64-LABEL: length48_eq_prefer128:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $48, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length48_eq_const(ptr %X) nounwind {
-; X64-SSE-LABEL: length48_eq_const:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $.L.str, %esi
-; X64-SSE-NEXT: movl $48, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: setne %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length48_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %xmm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length48_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %xmm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length48_eq_const:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: setne %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length48_eq_const:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm1
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,0,0,0,0]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: setne %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind
- %c = icmp ne i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length63(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length63:
-; X64: # %bb.0:
-; X64-NEXT: movl $63, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind
- ret i32 %m
-}
-
-define i1 @length63_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE-LABEL: length63_eq:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $63, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: setne %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length63_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 31(%rdi), %ymm1
-; X64-AVX1-NEXT: vxorps 31(%rsi), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length63_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor 31(%rsi), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length63_eq:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1
-; X64-AVX512-NEXT: vpxor 31(%rsi), %ymm1, %ymm1
-; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: setne %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length63_eq:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1
-; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm2
-; X64-MIC-AVX-NEXT: vmovdqu 31(%rsi), %ymm3
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: setne %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length63_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $63, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length63_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $63, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length63_eq_const(ptr %X) nounwind {
-; X64-SSE-LABEL: length63_eq_const:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $.L.str, %esi
-; X64-SSE-NEXT: movl $63, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: sete %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length63_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 31(%rdi), %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length63_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length63_eq_const:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: sete %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX-LABEL: length63_eq_const:
-; X64-MIC-AVX: # %bb.0:
-; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [875770417,943142453,842084409,909456435,809056311,875770417,943142453,842084409]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX-NEXT: sete %al
-; X64-MIC-AVX-NEXT: vzeroupper
-; X64-MIC-AVX-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length64(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length64:
-; X64: # %bb.0:
-; X64-NEXT: movl $64, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
- ret i32 %m
-}
-
-define i1 @length64_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE-LABEL: length64_eq:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $64, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: setne %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length64_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length64_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length64_eq:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512-NEXT: vpcmpneqd (%rsi), %zmm0, %k0
-; X64-AVX512-NEXT: kortestw %k0, %k0
-; X64-AVX512-NEXT: setne %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length64_eq:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-MIC-AVX2-NEXT: vmovdqu (%rsi), %ymm2
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rsi), %ymm3
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
-; X64-MIC-AVX2-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX2-NEXT: setne %al
-; X64-MIC-AVX2-NEXT: vzeroupper
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length64_eq:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k0
-; X64-MIC-AVX512F-NEXT: setne %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length64_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $64, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length64_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $64, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length64_eq_const(ptr %X) nounwind {
-; X64-SSE-LABEL: length64_eq_const:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $.L.str, %esi
-; X64-SSE-NEXT: movl $64, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: sete %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length64_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
-; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X64-AVX1-NEXT: vptest %ymm0, %ymm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: vzeroupper
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length64_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: length64_eq_const:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0
-; X64-AVX512-NEXT: kortestw %k0, %k0
-; X64-AVX512-NEXT: sete %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length64_eq_const:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,825243960,892613426,959985462,858927408]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
-; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
-; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
-; X64-MIC-AVX2-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX2-NEXT: sete %al
-; X64-MIC-AVX2-NEXT: vzeroupper
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length64_eq_const:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k0
-; X64-MIC-AVX512F-NEXT: sete %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length96(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length96:
-; X64: # %bb.0:
-; X64-NEXT: movl $96, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind
- ret i32 %m
-}
-
-define i1 @length96_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE-LABEL: length96_eq:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $96, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: setne %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length96_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $96, %edx
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length96_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rax
-; X64-AVX2-NEXT: movl $96, %edx
-; X64-AVX2-NEXT: callq memcmp
-; X64-AVX2-NEXT: testl %eax, %eax
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: popq %rcx
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length96_eq:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu 64(%rdi), %ymm1
-; X64-AVX512BW-NEXT: vmovdqu 64(%rsi), %ymm2
-; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb %zmm2, %zmm1, %k1
-; X64-AVX512BW-NEXT: kortestq %k1, %k0
-; X64-AVX512BW-NEXT: setne %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length96_eq:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1
-; X64-AVX512F-NEXT: vmovdqu 64(%rsi), %ymm2
-; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0
-; X64-AVX512F-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
-; X64-AVX512F-NEXT: kortestw %k1, %k0
-; X64-AVX512F-NEXT: setne %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length96_eq:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: pushq %rax
-; X64-MIC-AVX2-NEXT: movl $96, %edx
-; X64-MIC-AVX2-NEXT: callq memcmp
-; X64-MIC-AVX2-NEXT: testl %eax, %eax
-; X64-MIC-AVX2-NEXT: setne %al
-; X64-MIC-AVX2-NEXT: popq %rcx
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length96_eq:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1
-; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rsi), %ymm2
-; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0
-; X64-MIC-AVX512F-NEXT: setne %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length96_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $96, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length96_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $96, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length96_eq_const(ptr %X) nounwind {
-; X64-SSE-LABEL: length96_eq_const:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $.L.str, %esi
-; X64-SSE-NEXT: movl $96, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: sete %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length96_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $.L.str, %esi
-; X64-AVX1-NEXT: movl $96, %edx
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length96_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rax
-; X64-AVX2-NEXT: movl $.L.str, %esi
-; X64-AVX2-NEXT: movl $96, %edx
-; X64-AVX2-NEXT: callq memcmp
-; X64-AVX2-NEXT: testl %eax, %eax
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: popq %rcx
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length96_eq_const:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu 64(%rdi), %ymm1
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k1
-; X64-AVX512BW-NEXT: kortestq %k1, %k0
-; X64-AVX512BW-NEXT: sete %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length96_eq_const:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1
-; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0
-; X64-AVX512F-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k1
-; X64-AVX512F-NEXT: kortestw %k1, %k0
-; X64-AVX512F-NEXT: sete %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length96_eq_const:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: pushq %rax
-; X64-MIC-AVX2-NEXT: movl $.L.str, %esi
-; X64-MIC-AVX2-NEXT: movl $96, %edx
-; X64-MIC-AVX2-NEXT: callq memcmp
-; X64-MIC-AVX2-NEXT: testl %eax, %eax
-; X64-MIC-AVX2-NEXT: sete %al
-; X64-MIC-AVX2-NEXT: popq %rcx
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length96_eq_const:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0
-; X64-MIC-AVX512F-NEXT: sete %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length127(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length127:
-; X64: # %bb.0:
-; X64-NEXT: movl $127, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind
- ret i32 %m
-}
-
-define i1 @length127_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE-LABEL: length127_eq:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $127, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: setne %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length127_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $127, %edx
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length127_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rax
-; X64-AVX2-NEXT: movl $127, %edx
-; X64-AVX2-NEXT: callq memcmp
-; X64-AVX2-NEXT: testl %eax, %eax
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: popq %rcx
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length127_eq:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vpcmpneqb 63(%rsi), %zmm1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
-; X64-AVX512BW-NEXT: setne %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length127_eq:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1
-; X64-AVX512F-NEXT: vpcmpneqd 63(%rsi), %zmm1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
-; X64-AVX512F-NEXT: setne %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length127_eq:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: pushq %rax
-; X64-MIC-AVX2-NEXT: movl $127, %edx
-; X64-MIC-AVX2-NEXT: callq memcmp
-; X64-MIC-AVX2-NEXT: testl %eax, %eax
-; X64-MIC-AVX2-NEXT: setne %al
-; X64-MIC-AVX2-NEXT: popq %rcx
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length127_eq:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd 63(%rsi), %zmm1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX512F-NEXT: setne %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length127_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $127, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length127_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $127, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length127_eq_const(ptr %X) nounwind {
-; X64-SSE-LABEL: length127_eq_const:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $.L.str, %esi
-; X64-SSE-NEXT: movl $127, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: sete %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length127_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $.L.str, %esi
-; X64-AVX1-NEXT: movl $127, %edx
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length127_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rax
-; X64-AVX2-NEXT: movl $.L.str, %esi
-; X64-AVX2-NEXT: movl $127, %edx
-; X64-AVX2-NEXT: callq memcmp
-; X64-AVX2-NEXT: testl %eax, %eax
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: popq %rcx
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length127_eq_const:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str+63(%rip), %zmm1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
-; X64-AVX512BW-NEXT: sete %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length127_eq_const:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1
-; X64-AVX512F-NEXT: vpcmpneqd .L.str+63(%rip), %zmm1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
-; X64-AVX512F-NEXT: sete %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length127_eq_const:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: pushq %rax
-; X64-MIC-AVX2-NEXT: movl $.L.str, %esi
-; X64-MIC-AVX2-NEXT: movl $127, %edx
-; X64-MIC-AVX2-NEXT: callq memcmp
-; X64-MIC-AVX2-NEXT: testl %eax, %eax
-; X64-MIC-AVX2-NEXT: sete %al
-; X64-MIC-AVX2-NEXT: popq %rcx
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length127_eq_const:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+63(%rip), %zmm1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX512F-NEXT: sete %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length128(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length128:
-; X64: # %bb.0:
-; X64-NEXT: movl $128, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind
- ret i32 %m
-}
-
-define i1 @length128_eq(ptr %x, ptr %y) nounwind {
-; X64-SSE-LABEL: length128_eq:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $128, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: setne %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length128_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $128, %edx
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length128_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rax
-; X64-AVX2-NEXT: movl $128, %edx
-; X64-AVX2-NEXT: callq memcmp
-; X64-AVX2-NEXT: testl %eax, %eax
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: popq %rcx
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length128_eq:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vpcmpneqb 64(%rsi), %zmm1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
-; X64-AVX512BW-NEXT: setne %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length128_eq:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
-; X64-AVX512F-NEXT: setne %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length128_eq:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: pushq %rax
-; X64-MIC-AVX2-NEXT: movl $128, %edx
-; X64-MIC-AVX2-NEXT: callq memcmp
-; X64-MIC-AVX2-NEXT: testl %eax, %eax
-; X64-MIC-AVX2-NEXT: setne %al
-; X64-MIC-AVX2-NEXT: popq %rcx
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length128_eq:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX512F-NEXT: setne %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length128_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $128, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length128_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $128, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length128_eq_const(ptr %X) nounwind {
-; X64-SSE-LABEL: length128_eq_const:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: movl $.L.str, %esi
-; X64-SSE-NEXT: movl $128, %edx
-; X64-SSE-NEXT: callq memcmp
-; X64-SSE-NEXT: testl %eax, %eax
-; X64-SSE-NEXT: sete %al
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-;
-; X64-AVX1-LABEL: length128_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $.L.str, %esi
-; X64-AVX1-NEXT: movl $128, %edx
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length128_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rax
-; X64-AVX2-NEXT: movl $.L.str, %esi
-; X64-AVX2-NEXT: movl $128, %edx
-; X64-AVX2-NEXT: callq memcmp
-; X64-AVX2-NEXT: testl %eax, %eax
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: popq %rcx
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512BW-LABEL: length128_eq_const:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str+64(%rip), %zmm1, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
-; X64-AVX512BW-NEXT: sete %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512F-LABEL: length128_eq_const:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
-; X64-AVX512F-NEXT: sete %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
-;
-; X64-MIC-AVX2-LABEL: length128_eq_const:
-; X64-MIC-AVX2: # %bb.0:
-; X64-MIC-AVX2-NEXT: pushq %rax
-; X64-MIC-AVX2-NEXT: movl $.L.str, %esi
-; X64-MIC-AVX2-NEXT: movl $128, %edx
-; X64-MIC-AVX2-NEXT: callq memcmp
-; X64-MIC-AVX2-NEXT: testl %eax, %eax
-; X64-MIC-AVX2-NEXT: sete %al
-; X64-MIC-AVX2-NEXT: popq %rcx
-; X64-MIC-AVX2-NEXT: retq
-;
-; X64-MIC-AVX512F-LABEL: length128_eq_const:
-; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
-; X64-MIC-AVX512F-NEXT: sete %al
-; X64-MIC-AVX512F-NEXT: vzeroupper
-; X64-MIC-AVX512F-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length192(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length192:
-; X64: # %bb.0:
-; X64-NEXT: movl $192, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind
- ret i32 %m
-}
-
-define i1 @length192_eq(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length192_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $192, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length192_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $192, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length192_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $192, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length192_eq_const(ptr %X) nounwind {
-; X64-LABEL: length192_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $.L.str, %esi
-; X64-NEXT: movl $192, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length255(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length255:
-; X64: # %bb.0:
-; X64-NEXT: movl $255, %edx
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind
- ret i32 %m
-}
-
-define i1 @length255_eq(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length255_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $255, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length255_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $255, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length255_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $255, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length255_eq_const(ptr %X) nounwind {
-; X64-LABEL: length255_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $.L.str, %esi
-; X64-NEXT: movl $255, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length256(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length256:
-; X64: # %bb.0:
-; X64-NEXT: movl $256, %edx # imm = 0x100
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind
- ret i32 %m
-}
-
-define i1 @length256_eq(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length256_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $256, %edx # imm = 0x100
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length256_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $256, %edx # imm = 0x100
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length256_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $256, %edx # imm = 0x100
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length256_eq_const(ptr %X) nounwind {
-; X64-LABEL: length256_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $.L.str, %esi
-; X64-NEXT: movl $256, %edx # imm = 0x100
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length384(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length384:
-; X64: # %bb.0:
-; X64-NEXT: movl $384, %edx # imm = 0x180
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind
- ret i32 %m
-}
-
-define i1 @length384_eq(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length384_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $384, %edx # imm = 0x180
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length384_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $384, %edx # imm = 0x180
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length384_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $384, %edx # imm = 0x180
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length384_eq_const(ptr %X) nounwind {
-; X64-LABEL: length384_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $.L.str, %esi
-; X64-NEXT: movl $384, %edx # imm = 0x180
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length511(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length511:
-; X64: # %bb.0:
-; X64-NEXT: movl $511, %edx # imm = 0x1FF
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind
- ret i32 %m
-}
-
-define i1 @length511_eq(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length511_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $511, %edx # imm = 0x1FF
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length511_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $511, %edx # imm = 0x1FF
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length511_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $511, %edx # imm = 0x1FF
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length511_eq_const(ptr %X) nounwind {
-; X64-LABEL: length511_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $.L.str, %esi
-; X64-NEXT: movl $511, %edx # imm = 0x1FF
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-define i32 @length512(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: length512:
-; X64: # %bb.0:
-; X64-NEXT: movl $512, %edx # imm = 0x200
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind
- ret i32 %m
-}
-
-define i1 @length512_eq(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length512_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $512, %edx # imm = 0x200
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_lt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length512_lt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $512, %edx # imm = 0x200
-; X64-NEXT: callq memcmp
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_gt(ptr %x, ptr %y) nounwind {
-; X64-LABEL: length512_gt:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $512, %edx # imm = 0x200
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setg %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @length512_eq_const(ptr %X) nounwind {
-; X64-LABEL: length512_eq_const:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $.L.str, %esi
-; X64-NEXT: movl $512, %edx # imm = 0x200
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; This checks that we do not do stupid things with huge sizes.
-define i32 @huge_length(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: huge_length:
-; X64: # %bb.0:
-; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind
- ret i32 %m
-}
-
-define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind {
-; X64-LABEL: huge_length_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
-
-; This checks non-constant sizes.
-define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind {
-; X64-LABEL: nonconst_length:
-; X64: # %bb.0:
-; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind
- ret i32 %m
-}
-
-define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind {
-; X64-LABEL: nonconst_length_eq:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
- %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind
- %c = icmp eq i32 %m, 0
- ret i1 %c
-}
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index fb8d2335b34106..e9eddf35f7403c 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -19,8 +19,8 @@
; CHECK-NEXT: Type-Based Alias Analysis
; CHECK-NEXT: Scoped NoAlias Alias Analysis
; CHECK-NEXT: Assumption Cache Tracker
-; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Create Garbage Collector Module Metadata
+; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: Default Regalloc Eviction Advisor
; CHECK-NEXT: Default Regalloc Priority Advisor
@@ -42,13 +42,6 @@
; CHECK-NEXT: Canonicalize Freeze Instructions in Loops
; CHECK-NEXT: Induction Variable Users
; CHECK-NEXT: Loop Strength Reduction
-; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
-; CHECK-NEXT: Function Alias Analysis Results
-; CHECK-NEXT: Merge contiguous icmps into a memcmp
-; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: Lazy Branch Probability Analysis
-; CHECK-NEXT: Lazy Block Frequency Analysis
-; CHECK-NEXT: Expand memcmp() to load/stores
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Lower constant intrinsics
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index ecdb5a5e010d92..ce13b2eb52a7ef 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -142,10 +142,12 @@
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass
+; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
+; CHECK-O-NEXT: Running analysis: AAManager on foo
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
-; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index 064362eabbf839..d6f09a85953c14 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -81,10 +81,12 @@
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
+; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
+; CHECK-O-NEXT: Running analysis: AAManager on foo
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
-; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index 19a44867e434ac..cc3939c5bdcf7b 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -69,10 +69,12 @@
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass
+; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
+; CHECK-O-NEXT: Running analysis: AAManager on foo
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
-; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index ac80a31d8fd4bc..bf354c91d15f37 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -77,10 +77,12 @@
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass
+; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
+; CHECK-O-NEXT: Running analysis: AAManager on foo
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
-; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
index 6486639e07b49c..9c5f9fd281ee7c 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
@@ -112,10 +112,12 @@
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
+; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
+; CHECK-O-NEXT: Running analysis: AAManager on foo
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
-; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
index 09f9f0f48baddb..92ab5b6bbc74ad 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -102,17 +102,23 @@
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass
+; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
+; CHECK-O-NEXT: Running analysis: TargetIRAnalysis on foo
+; CHECK-O-NEXT: Running analysis: AAManager on foo
+; CHECK-O-NEXT: Running analysis: BasicAA on foo
+; CHECK-O-NEXT: Running analysis: AssumptionAnalysis on foo
+; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis on foo
+; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA on foo
+; CHECK-O-NEXT: Running analysis: TypeBasedAA on foo
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
+; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo
+; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo
+; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
+; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SROAPass
-; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
-; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
-; CHECK-O-NEXT: Running analysis: AAManager
-; CHECK-O-NEXT: Running analysis: BasicAA
-; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA
-; CHECK-O-NEXT: Running analysis: TypeBasedAA
-; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
@@ -120,10 +126,6 @@
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: InstCombinePass
-; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo
-; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo
-; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
-; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O23SZ-NEXT: Running pass: AggressiveInstCombinePass
; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass
; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
index 47bdbfd2d357d4..b565e80ac05e90 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -81,10 +81,12 @@
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass
+; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
+; CHECK-O-NEXT: Running analysis: AAManager on foo
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
-; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/bcmp.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/bcmp.ll
new file mode 100644
index 00000000000000..18141e72007f7a
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/bcmp.ll
@@ -0,0 +1,751 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+declare i32 @bcmp(ptr, ptr, i64)
+
+define i1 @bcmp0(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp0(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: ret i1 true
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 0)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp1(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp1(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 1)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp2(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp2(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 2)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+; or (and (xor a, b), C1), (and (xor c, d), C2)
+define i1 @bcmp3(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp3(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 2
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 2
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; CHECK-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 3)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp4(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp4(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 4)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+; or (xor a, b), (and (xor c, d), C2)
+define i1 @bcmp5(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp5(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 4
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 5)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+; or (xor a, b), (and (xor c, d), C2)
+define i1 @bcmp6(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp6(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 4
+; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 6)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+; or (xor a, b), (xor c, d)
+define i1 @bcmp7(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp7(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 3
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 3
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 7)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp8(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp8(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 8)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+; or (xor a, b), (and (xor c, d), C2)
+define i1 @bcmp9(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp9(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 9)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp10(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp10(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 10)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp11(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp11(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 3
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 3
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 11)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp12(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp12(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 12)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp13(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp13(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 5
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 5
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 13)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp14(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp14(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 6
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 6
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 14)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp15(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp15(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 7
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 7
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 15)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp16(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp16(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 16)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp20(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp20(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[TMP15]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i64 [[TMP17]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP19]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 20)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp24(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp24(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP13]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP17]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 24)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp28(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp28(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[A]], i64 24
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[B]], i64 24
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[TMP16]] to i64
+; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP17]] to i64
+; CHECK-NEXT: [[TMP20:%.*]] = xor i64 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP13]], [[TMP20]]
+; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP23]], 0
+; CHECK-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP25]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 28)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp33(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp33(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[A]], i64 24
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[B]], i64 24
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[A]], i64 32
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 32
+; CHECK-NEXT: [[TMP21:%.*]] = load i8, ptr [[TMP19]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = load i8, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP23:%.*]] = zext i8 [[TMP21]] to i64
+; CHECK-NEXT: [[TMP24:%.*]] = zext i8 [[TMP22]] to i64
+; CHECK-NEXT: [[TMP25:%.*]] = xor i64 [[TMP23]], [[TMP24]]
+; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP28]], [[TMP25]]
+; CHECK-NEXT: [[TMP30:%.*]] = icmp ne i64 [[TMP29]], 0
+; CHECK-NEXT: [[TMP31:%.*]] = zext i1 [[TMP30]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP31]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 33)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp38(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp38(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[A]], i64 24
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[B]], i64 24
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[A]], i64 30
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 30
+; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP24]], [[TMP25]]
+; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP26]], [[TMP23]]
+; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP27]], 0
+; CHECK-NEXT: [[TMP29:%.*]] = zext i1 [[TMP28]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP29]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 38)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp45(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp45(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[A]], i64 24
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[B]], i64 24
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[A]], i64 32
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 32
+; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[A]], i64 37
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 37
+; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 1
+; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP23]], [[TMP28]]
+; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP29]], [[TMP30]]
+; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP32]], [[TMP31]]
+; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i64 [[TMP33]], 0
+; CHECK-NEXT: [[TMP35:%.*]] = zext i1 [[TMP34]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP35]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 45)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+; Although the large cmp chain may be not profitable on high end CPU, we
+; believe it is better on most cpus, so perform the transform now.
+; 8 xor + 7 or + 1 cmp only need 6 cycles on a 4 width ALU port machine
+; 2 cycle for xor
+; 3 cycle for or
+; 1 cycle for cmp
+define i1 @bcmp64(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp64(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[A]], i64 24
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[B]], i64 24
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[A]], i64 32
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 32
+; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[A]], i64 40
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 40
+; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 1
+; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[A]], i64 48
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 48
+; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP33:%.*]] = xor i64 [[TMP31]], [[TMP32]]
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[A]], i64 56
+; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[B]], i64 56
+; CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr [[TMP34]], align 1
+; CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[TMP35]], align 1
+; CHECK-NEXT: [[TMP38:%.*]] = xor i64 [[TMP36]], [[TMP37]]
+; CHECK-NEXT: [[TMP39:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP23]], [[TMP28]]
+; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP33]], [[TMP38]]
+; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP39]], [[TMP40]]
+; CHECK-NEXT: [[TMP44:%.*]] = or i64 [[TMP41]], [[TMP42]]
+; CHECK-NEXT: [[TMP45:%.*]] = or i64 [[TMP43]], [[TMP44]]
+; CHECK-NEXT: [[TMP46:%.*]] = icmp ne i64 [[TMP45]], 0
+; CHECK-NEXT: [[TMP47:%.*]] = zext i1 [[TMP46]] to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP47]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 64)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp89(ptr %a, ptr %b) {
+; CHECK-LABEL: define i1 @bcmp89(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[CR:%.*]] = call i32 @bcmp(ptr [[A]], ptr [[B]], i64 89)
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[CR]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %cr = call i32 @bcmp(ptr %a, ptr %b, i64 89)
+ %r = icmp eq i32 %cr, 0
+ ret i1 %r
+}
+
+define i1 @bcmp_zext(i32 %0, i32 %1, i8 %2, i8 %3) {
+; CHECK-LABEL: define i1 @bcmp_zext(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i8 [[TMP2:%.*]], i8 [[TMP3:%.*]]) {
+; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP6:%.*]] = xor i8 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP5]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT: ret i1 [[TMP9]]
+;
+ %5 = xor i32 %1, %0
+ %6 = xor i8 %3, %2
+ %7 = zext i8 %6 to i32
+ %8 = or i32 %5, %7
+ %9 = icmp eq i32 %8, 0
+ ret i1 %9
+}
+
+define i1 @bcmp_i8(i8 %a0, i8 %b0, i8 %a1, i8 %b1, i8 %a2, i8 %b2) {
+; CHECK-LABEL: define i1 @bcmp_i8(
+; CHECK-SAME: i8 [[A0:%.*]], i8 [[B0:%.*]], i8 [[A1:%.*]], i8 [[B1:%.*]], i8 [[A2:%.*]], i8 [[B2:%.*]]) {
+; CHECK-NEXT: [[XOR0:%.*]] = xor i8 [[B0]], [[A0]]
+; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B1]], [[A1]]
+; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[B2]], [[A2]]
+; CHECK-NEXT: [[OR0:%.*]] = or i8 [[XOR0]], [[XOR1]]
+; CHECK-NEXT: [[OR1:%.*]] = or i8 [[OR0]], [[XOR2]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[OR1]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %xor0 = xor i8 %b0, %a0
+ %xor1 = xor i8 %b1, %a1
+ %xor2 = xor i8 %b2, %a2
+ %or0 = or i8 %xor0, %xor1
+ %or1 = or i8 %or0, %xor2
+ %r = icmp eq i8 %or1, 0
+ ret i1 %r
+}
+
+define i1 @bcmp_i16(i16 %a0, i16 %b0, i16 %a1, i16 %b1, i16 %a2, i16 %b2) {
+; CHECK-LABEL: define i1 @bcmp_i16(
+; CHECK-SAME: i16 [[A0:%.*]], i16 [[B0:%.*]], i16 [[A1:%.*]], i16 [[B1:%.*]], i16 [[A2:%.*]], i16 [[B2:%.*]]) {
+; CHECK-NEXT: [[XOR0:%.*]] = xor i16 [[B0]], [[A0]]
+; CHECK-NEXT: [[XOR1:%.*]] = xor i16 [[B1]], [[A1]]
+; CHECK-NEXT: [[XOR2:%.*]] = xor i16 [[B2]], [[A2]]
+; CHECK-NEXT: [[OR0:%.*]] = or i16 [[XOR0]], [[XOR1]]
+; CHECK-NEXT: [[OR1:%.*]] = or i16 [[OR0]], [[XOR2]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[OR1]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %xor0 = xor i16 %b0, %a0
+ %xor1 = xor i16 %b1, %a1
+ %xor2 = xor i16 %b2, %a2
+ %or0 = or i16 %xor0, %xor1
+ %or1 = or i16 %or0, %xor2
+ %r = icmp eq i16 %or1, 0
+ ret i1 %r
+}
+
+define i1 @bcmp_i128(i128 %a0, i128 %b0, i128 %a1, i128 %b1, i128 %a2, i128 %b2) {
+; CHECK-LABEL: define i1 @bcmp_i128(
+; CHECK-SAME: i128 [[A0:%.*]], i128 [[B0:%.*]], i128 [[A1:%.*]], i128 [[B1:%.*]], i128 [[A2:%.*]], i128 [[B2:%.*]]) {
+; CHECK-NEXT: [[XOR0:%.*]] = xor i128 [[B0]], [[A0]]
+; CHECK-NEXT: [[XOR1:%.*]] = xor i128 [[B1]], [[A1]]
+; CHECK-NEXT: [[XOR2:%.*]] = xor i128 [[B2]], [[A2]]
+; CHECK-NEXT: [[OR0:%.*]] = or i128 [[XOR0]], [[XOR1]]
+; CHECK-NEXT: [[OR1:%.*]] = or i128 [[OR0]], [[XOR2]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i128 [[OR1]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %xor0 = xor i128 %b0, %a0
+ %xor1 = xor i128 %b1, %a1
+ %xor2 = xor i128 %b2, %a2
+ %or0 = or i128 %xor0, %xor1
+ %or1 = or i128 %or0, %xor2
+ %r = icmp ne i128 %or1, 0
+ ret i1 %r
+}
+
+define i1 @bcmp_i42(i42 %a0, i42 %b0, i42 %a1, i42 %b1, i42 %a2, i42 %b2) {
+; CHECK-LABEL: define i1 @bcmp_i42(
+; CHECK-SAME: i42 [[A0:%.*]], i42 [[B0:%.*]], i42 [[A1:%.*]], i42 [[B1:%.*]], i42 [[A2:%.*]], i42 [[B2:%.*]]) {
+; CHECK-NEXT: [[XOR0:%.*]] = xor i42 [[B0]], [[A0]]
+; CHECK-NEXT: [[XOR1:%.*]] = xor i42 [[B1]], [[A1]]
+; CHECK-NEXT: [[XOR2:%.*]] = xor i42 [[B2]], [[A2]]
+; CHECK-NEXT: [[OR0:%.*]] = or i42 [[XOR0]], [[XOR1]]
+; CHECK-NEXT: [[OR1:%.*]] = or i42 [[OR0]], [[XOR2]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i42 [[OR1]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %xor0 = xor i42 %b0, %a0
+ %xor1 = xor i42 %b1, %a1
+ %xor2 = xor i42 %b2, %a2
+ %or0 = or i42 %xor0, %xor1
+ %or1 = or i42 %or0, %xor2
+ %r = icmp ne i42 %or1, 0
+ ret i1 %r
+}
diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp-extra.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp-extra.ll
new file mode 100644
index 00000000000000..e9573816c97880
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp-extra.ll
@@ -0,0 +1,3434 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+ at .str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1
+
+declare dso_local i32 @memcmp(ptr, ptr, i64)
+
+define i32 @length0(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length0(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret i32 0
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
+ ret i32 %m
+ }
+
+define i1 @length0_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length0_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: ret i1 true
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length0_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: ret i1 false
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length2(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length2(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: ret i32 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ ret i32 %m
+}
+
+define i32 @length2_const(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length2_const(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length2_gt_const(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length2_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length2_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length2_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length2_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length3(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length3(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i24, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i24, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = zext i24 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = zext i24 [[TMP2]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
+; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: ret i32 [[TMP11]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length3_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; CHECK-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length4(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: ret i32 [[TMP9]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length4_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: ret i1 [[TMP3]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length4_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: ret i1 [[TMP5]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4_lt_32(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length4_lt_32(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
+; CHECK-NEXT: ret i32 [[TMP6]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = lshr i32 %m, 31
+ ret i32 %c
+}
+
+define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length4_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: ret i1 [[TMP5]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length4_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length5(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length5(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i40, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i40, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = zext i40 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext i40 [[TMP2]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
+; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: ret i32 [[TMP11]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length5_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length5_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i40, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i40, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = zext i40 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext i40 [[TMP2]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length6(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length6(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i48, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i48, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = zext i48 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext i48 [[TMP2]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
+; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: ret i32 [[TMP11]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind
+ ret i32 %m
+}
+
+define i32 @length6_lt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length6_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i48, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i48, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = zext i48 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext i48 [[TMP2]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; CHECK-NEXT: ret i32 [[TMP8]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind
+ %r = lshr i32 %m, 31
+ ret i32 %r
+}
+
+define i32 @length7(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length7(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
+ ret i32 %m
+}
+
+define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length7_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length7_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length7_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: ret i1 [[TMP10]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length8(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: ret i32 [[TMP9]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length8_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length8_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length8_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length9(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length9(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP5:%.*]], [[TMP6:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; CHECK-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br label [[ENDBLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind
+ ret i32 %m
+}
+
+define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length9_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length10(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length10(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP10]])
+; CHECK-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP11]])
+; CHECK-NEXT: [[TMP14]] = zext i16 [[TMP12]] to i64
+; CHECK-NEXT: [[TMP15]] = zext i16 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; CHECK-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind
+ ret i32 %m
+}
+
+define i1 @length10_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length10_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length11(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length11(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind
+ ret i32 %m
+}
+
+define i1 @length11_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length11_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length12_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length12_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length12(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; CHECK-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; CHECK-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; CHECK-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
+ ret i32 %m
+}
+
+define i1 @length13_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length13_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length14_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length14_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length15(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length15(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
+ ret i32 %m
+}
+
+define i1 @length15_lt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length15_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length15_const(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length15_const(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; CHECK-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; CHECK-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind
+ ret i32 %m
+}
+
+define i1 @length15_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length15_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length15_gt_const(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; CHECK-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; CHECK-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+
+define i32 @length16(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length16(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length16_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: ret i1 [[TMP10]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length16_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length16_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length16_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056
+; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+
+define i32 @length24(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length24(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
+ ret i32 %m
+}
+
+define i1 @length24_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length24_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP13]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length24_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length24_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length24_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 3689065127958034230
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], [[TMP8]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i64 [[TMP10]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; CHECK-NEXT: ret i1 [[TMP11]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length31(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length31(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind
+ ret i32 %m
+}
+
+define i1 @length31_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length31_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i64 [[TMP21]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP23]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length31_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length31_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+; CHECK-LABEL: define i1 @length31_eq_prefer128(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i64 [[TMP21]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP23]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length31_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 3689065127958034230
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 3474870397276861491
+; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; CHECK-NEXT: ret i1 [[TMP15]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length32(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length32(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
+ ret i32 %m
+}
+
+
+define i1 @length32_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length32_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i64 [[TMP21]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP23]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length32_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length32_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+; CHECK-LABEL: define i1 @length32_eq_prefer128(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i64 [[TMP21]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP23]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length32_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 3689065127958034230
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 3544395820347831604
+; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; CHECK-NEXT: ret i1 [[TMP15]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length48(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length48(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]]
+; CHECK: loadbb4:
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]])
+; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]])
+; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]]
+; CHECK: loadbb5:
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1
+; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1
+; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]])
+; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]])
+; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB5]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind
+ ret i32 %m
+}
+
+define i1 @length48_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length48_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 1
+; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP23]], [[TMP28]]
+; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP29]], [[TMP30]]
+; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP32]], [[TMP31]]
+; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i64 [[TMP33]], 0
+; CHECK-NEXT: [[TMP35:%.*]] = zext i1 [[TMP34]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP35]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length48_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]]
+; CHECK: loadbb4:
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]])
+; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]])
+; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]]
+; CHECK: loadbb5:
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1
+; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1
+; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]])
+; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]])
+; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB5]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length48_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]]
+; CHECK: loadbb4:
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]])
+; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]])
+; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]]
+; CHECK: loadbb5:
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1
+; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1
+; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]])
+; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]])
+; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB5]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+; CHECK-LABEL: define i1 @length48_eq_prefer128(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 1
+; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP23]], [[TMP28]]
+; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP29]], [[TMP30]]
+; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP32]], [[TMP31]]
+; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i64 [[TMP33]], 0
+; CHECK-NEXT: [[TMP35:%.*]] = zext i1 [[TMP34]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP35]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length48_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 3689065127958034230
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 3544395820347831604
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 4123106164818064178
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 3978425819141910832
+; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP14]], [[TMP17]]
+; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP21]], [[TMP20]]
+; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP22]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = zext i1 [[TMP23]] to i32
+; CHECK-NEXT: ret i1 [[TMP23]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length63(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length63(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ], [ [[TMP47:%.*]], [[LOADBB6:%.*]] ], [ [[TMP54:%.*]], [[LOADBB7:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ], [ [[TMP48:%.*]], [[LOADBB6]] ], [ [[TMP55:%.*]], [[LOADBB7]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]]
+; CHECK: loadbb4:
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]])
+; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]])
+; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]]
+; CHECK: loadbb5:
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1
+; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1
+; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]])
+; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]])
+; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[LOADBB6]], label [[RES_BLOCK]]
+; CHECK: loadbb6:
+; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP43]], align 1
+; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP44]], align 1
+; CHECK-NEXT: [[TMP47]] = call i64 @llvm.bswap.i64(i64 [[TMP45]])
+; CHECK-NEXT: [[TMP48]] = call i64 @llvm.bswap.i64(i64 [[TMP46]])
+; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[TMP47]], [[TMP48]]
+; CHECK-NEXT: br i1 [[TMP49]], label [[LOADBB7]], label [[RES_BLOCK]]
+; CHECK: loadbb7:
+; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[X]], i64 55
+; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[Y]], i64 55
+; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP50]], align 1
+; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[TMP51]], align 1
+; CHECK-NEXT: [[TMP54]] = call i64 @llvm.bswap.i64(i64 [[TMP52]])
+; CHECK-NEXT: [[TMP55]] = call i64 @llvm.bswap.i64(i64 [[TMP53]])
+; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[TMP54]], [[TMP55]]
+; CHECK-NEXT: br i1 [[TMP56]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB7]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind
+ ret i32 %m
+}
+
+define i1 @length63_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length63_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 1
+; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP33:%.*]] = xor i64 [[TMP31]], [[TMP32]]
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[X]], i64 55
+; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[Y]], i64 55
+; CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr [[TMP34]], align 1
+; CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[TMP35]], align 1
+; CHECK-NEXT: [[TMP38:%.*]] = xor i64 [[TMP36]], [[TMP37]]
+; CHECK-NEXT: [[TMP39:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP23]], [[TMP28]]
+; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP33]], [[TMP38]]
+; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP39]], [[TMP40]]
+; CHECK-NEXT: [[TMP44:%.*]] = or i64 [[TMP41]], [[TMP42]]
+; CHECK-NEXT: [[TMP45:%.*]] = or i64 [[TMP43]], [[TMP44]]
+; CHECK-NEXT: [[TMP46:%.*]] = icmp ne i64 [[TMP45]], 0
+; CHECK-NEXT: [[TMP47:%.*]] = zext i1 [[TMP46]] to i32
+; CHECK-NEXT: ret i1 [[TMP46]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length63_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ], [ [[TMP47:%.*]], [[LOADBB6:%.*]] ], [ [[TMP54:%.*]], [[LOADBB7:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ], [ [[TMP48:%.*]], [[LOADBB6]] ], [ [[TMP55:%.*]], [[LOADBB7]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]]
+; CHECK: loadbb4:
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]])
+; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]])
+; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]]
+; CHECK: loadbb5:
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1
+; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1
+; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]])
+; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]])
+; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[LOADBB6]], label [[RES_BLOCK]]
+; CHECK: loadbb6:
+; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP43]], align 1
+; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP44]], align 1
+; CHECK-NEXT: [[TMP47]] = call i64 @llvm.bswap.i64(i64 [[TMP45]])
+; CHECK-NEXT: [[TMP48]] = call i64 @llvm.bswap.i64(i64 [[TMP46]])
+; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[TMP47]], [[TMP48]]
+; CHECK-NEXT: br i1 [[TMP49]], label [[LOADBB7]], label [[RES_BLOCK]]
+; CHECK: loadbb7:
+; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[X]], i64 55
+; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[Y]], i64 55
+; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP50]], align 1
+; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[TMP51]], align 1
+; CHECK-NEXT: [[TMP54]] = call i64 @llvm.bswap.i64(i64 [[TMP52]])
+; CHECK-NEXT: [[TMP55]] = call i64 @llvm.bswap.i64(i64 [[TMP53]])
+; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[TMP54]], [[TMP55]]
+; CHECK-NEXT: br i1 [[TMP56]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB7]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length63_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ], [ [[TMP47:%.*]], [[LOADBB6:%.*]] ], [ [[TMP54:%.*]], [[LOADBB7:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ], [ [[TMP48:%.*]], [[LOADBB6]] ], [ [[TMP55:%.*]], [[LOADBB7]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]]
+; CHECK: loadbb4:
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]])
+; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]])
+; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]]
+; CHECK: loadbb5:
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1
+; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1
+; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]])
+; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]])
+; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[LOADBB6]], label [[RES_BLOCK]]
+; CHECK: loadbb6:
+; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP43]], align 1
+; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP44]], align 1
+; CHECK-NEXT: [[TMP47]] = call i64 @llvm.bswap.i64(i64 [[TMP45]])
+; CHECK-NEXT: [[TMP48]] = call i64 @llvm.bswap.i64(i64 [[TMP46]])
+; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[TMP47]], [[TMP48]]
+; CHECK-NEXT: br i1 [[TMP49]], label [[LOADBB7]], label [[RES_BLOCK]]
+; CHECK: loadbb7:
+; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[X]], i64 55
+; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[Y]], i64 55
+; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP50]], align 1
+; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[TMP51]], align 1
+; CHECK-NEXT: [[TMP54]] = call i64 @llvm.bswap.i64(i64 [[TMP52]])
+; CHECK-NEXT: [[TMP55]] = call i64 @llvm.bswap.i64(i64 [[TMP53]])
+; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[TMP54]], [[TMP55]]
+; CHECK-NEXT: br i1 [[TMP56]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB7]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length63_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 3689065127958034230
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 3544395820347831604
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 4123106164818064178
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 3978425819141910832
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP18]], align 1
+; CHECK-NEXT: [[TMP20:%.*]] = xor i64 [[TMP19]], 3833745473465760056
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[X]], i64 55
+; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP21]], align 1
+; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP22]], 3616724998069630517
+; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP14]], [[TMP17]]
+; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP20]], [[TMP23]]
+; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP24]], [[TMP25]]
+; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP28]], [[TMP29]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP30]], 0
+; CHECK-NEXT: [[TMP32:%.*]] = zext i1 [[TMP31]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP32]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length64(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length64(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ], [ [[TMP47:%.*]], [[LOADBB6:%.*]] ], [ [[TMP54:%.*]], [[LOADBB7:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ], [ [[TMP48:%.*]], [[LOADBB6]] ], [ [[TMP55:%.*]], [[LOADBB7]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]]
+; CHECK: loadbb4:
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]])
+; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]])
+; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]]
+; CHECK: loadbb5:
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1
+; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1
+; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]])
+; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]])
+; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[LOADBB6]], label [[RES_BLOCK]]
+; CHECK: loadbb6:
+; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP43]], align 1
+; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP44]], align 1
+; CHECK-NEXT: [[TMP47]] = call i64 @llvm.bswap.i64(i64 [[TMP45]])
+; CHECK-NEXT: [[TMP48]] = call i64 @llvm.bswap.i64(i64 [[TMP46]])
+; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[TMP47]], [[TMP48]]
+; CHECK-NEXT: br i1 [[TMP49]], label [[LOADBB7]], label [[RES_BLOCK]]
+; CHECK: loadbb7:
+; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[X]], i64 56
+; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[Y]], i64 56
+; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP50]], align 1
+; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[TMP51]], align 1
+; CHECK-NEXT: [[TMP54]] = call i64 @llvm.bswap.i64(i64 [[TMP52]])
+; CHECK-NEXT: [[TMP55]] = call i64 @llvm.bswap.i64(i64 [[TMP53]])
+; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[TMP54]], [[TMP55]]
+; CHECK-NEXT: br i1 [[TMP56]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB7]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length64_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 1
+; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP33:%.*]] = xor i64 [[TMP31]], [[TMP32]]
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[X]], i64 56
+; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[Y]], i64 56
+; CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr [[TMP34]], align 1
+; CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[TMP35]], align 1
+; CHECK-NEXT: [[TMP38:%.*]] = xor i64 [[TMP36]], [[TMP37]]
+; CHECK-NEXT: [[TMP39:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP23]], [[TMP28]]
+; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP33]], [[TMP38]]
+; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP39]], [[TMP40]]
+; CHECK-NEXT: [[TMP44:%.*]] = or i64 [[TMP41]], [[TMP42]]
+; CHECK-NEXT: [[TMP45:%.*]] = or i64 [[TMP43]], [[TMP44]]
+; CHECK-NEXT: [[TMP46:%.*]] = icmp ne i64 [[TMP45]], 0
+; CHECK-NEXT: [[TMP47:%.*]] = zext i1 [[TMP46]] to i32
+; CHECK-NEXT: ret i1 [[TMP46]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length64_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ], [ [[TMP47:%.*]], [[LOADBB6:%.*]] ], [ [[TMP54:%.*]], [[LOADBB7:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ], [ [[TMP48:%.*]], [[LOADBB6]] ], [ [[TMP55:%.*]], [[LOADBB7]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]]
+; CHECK: loadbb4:
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]])
+; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]])
+; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]]
+; CHECK: loadbb5:
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1
+; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1
+; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]])
+; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]])
+; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[LOADBB6]], label [[RES_BLOCK]]
+; CHECK: loadbb6:
+; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP43]], align 1
+; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP44]], align 1
+; CHECK-NEXT: [[TMP47]] = call i64 @llvm.bswap.i64(i64 [[TMP45]])
+; CHECK-NEXT: [[TMP48]] = call i64 @llvm.bswap.i64(i64 [[TMP46]])
+; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[TMP47]], [[TMP48]]
+; CHECK-NEXT: br i1 [[TMP49]], label [[LOADBB7]], label [[RES_BLOCK]]
+; CHECK: loadbb7:
+; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[X]], i64 56
+; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[Y]], i64 56
+; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP50]], align 1
+; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[TMP51]], align 1
+; CHECK-NEXT: [[TMP54]] = call i64 @llvm.bswap.i64(i64 [[TMP52]])
+; CHECK-NEXT: [[TMP55]] = call i64 @llvm.bswap.i64(i64 [[TMP53]])
+; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[TMP54]], [[TMP55]]
+; CHECK-NEXT: br i1 [[TMP56]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB7]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length64_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ], [ [[TMP47:%.*]], [[LOADBB6:%.*]] ], [ [[TMP54:%.*]], [[LOADBB7:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ], [ [[TMP48:%.*]], [[LOADBB6]] ], [ [[TMP55:%.*]], [[LOADBB7]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]]
+; CHECK: loadbb4:
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]])
+; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]])
+; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]]
+; CHECK: loadbb5:
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40
+; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1
+; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1
+; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]])
+; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]])
+; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[LOADBB6]], label [[RES_BLOCK]]
+; CHECK: loadbb6:
+; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP43]], align 1
+; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP44]], align 1
+; CHECK-NEXT: [[TMP47]] = call i64 @llvm.bswap.i64(i64 [[TMP45]])
+; CHECK-NEXT: [[TMP48]] = call i64 @llvm.bswap.i64(i64 [[TMP46]])
+; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[TMP47]], [[TMP48]]
+; CHECK-NEXT: br i1 [[TMP49]], label [[LOADBB7]], label [[RES_BLOCK]]
+; CHECK: loadbb7:
+; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[X]], i64 56
+; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[Y]], i64 56
+; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP50]], align 1
+; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[TMP51]], align 1
+; CHECK-NEXT: [[TMP54]] = call i64 @llvm.bswap.i64(i64 [[TMP52]])
+; CHECK-NEXT: [[TMP55]] = call i64 @llvm.bswap.i64(i64 [[TMP53]])
+; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[TMP54]], [[TMP55]]
+; CHECK-NEXT: br i1 [[TMP56]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB7]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length64_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 3689065127958034230
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 3544395820347831604
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 4123106164818064178
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 40
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 3978425819141910832
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP18]], align 1
+; CHECK-NEXT: [[TMP20:%.*]] = xor i64 [[TMP19]], 3833745473465760056
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[X]], i64 56
+; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP21]], align 1
+; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP22]], 3689065127958034230
+; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP14]], [[TMP17]]
+; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP20]], [[TMP23]]
+; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP24]], [[TMP25]]
+; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP28]], [[TMP29]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP30]], 0
+; CHECK-NEXT: [[TMP32:%.*]] = zext i1 [[TMP31]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP32]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length96(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length96(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind
+ ret i32 %m
+}
+
+define i1 @length96_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length96_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length96_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length96_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length96_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 96) #[[ATTR0]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length127(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length127(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind
+ ret i32 %m
+}
+
+define i1 @length127_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length127_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length127_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length127_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length127_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 127) #[[ATTR0]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length128(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length128(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind
+ ret i32 %m
+}
+
+define i1 @length128_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length128_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length128_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length128_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length128_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 128) #[[ATTR0]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length192(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length192(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind
+ ret i32 %m
+}
+
+define i1 @length192_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length192_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length192_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length192_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length192_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR0]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length255(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length255(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind
+ ret i32 %m
+}
+
+define i1 @length255_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length255_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length255_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length255_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length255_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR0]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length256(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length256(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind
+ ret i32 %m
+}
+
+define i1 @length256_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length256_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length256_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length256_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length256_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR0]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length384(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length384(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind
+ ret i32 %m
+}
+
+define i1 @length384_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length384_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length384_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length384_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length384_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR0]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length511(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length511(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind
+ ret i32 %m
+}
+
+define i1 @length511_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length511_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length511_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length511_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length511_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR0]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length512(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @length512(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind
+ ret i32 %m
+}
+
+define i1 @length512_eq(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length512_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_lt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length512_lt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_gt(ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: define i1 @length512_gt(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_eq_const(ptr %X) nounwind {
+; CHECK-LABEL: define i1 @length512_eq_const(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR0]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @huge_length(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i32 @huge_length(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR0]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind
+ ret i32 %m
+}
+
+define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @huge_length_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR0]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind {
+; CHECK-LABEL: define i32 @nonconst_length(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR0]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind
+ ret i32 %m
+}
+
+define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind {
+; CHECK-LABEL: define i1 @nonconst_length_eq(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR0]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll
index 92439691e1873c..735fb27da16060 100644
--- a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll
@@ -1,5 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
-; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
declare i32 @memcmp(ptr nocapture, ptr nocapture, i64)
diff --git a/llvm/test/Transforms/ExpandMemCmp/BPF/lit.local.cfg b/llvm/test/Transforms/ExpandMemCmp/BPF/lit.local.cfg
new file mode 100644
index 00000000000000..d1828f2b613d9e
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/BPF/lit.local.cfg
@@ -0,0 +1,4 @@
+if not "BPF" in config.root.targets:
+ config.unsupported = True
+if "system-aix" in config.available_features:
+ config.unsupported = True
diff --git a/llvm/test/Transforms/ExpandMemCmp/BPF/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/BPF/memcmp.ll
new file mode 100644
index 00000000000000..1accfe88d1a82a
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/BPF/memcmp.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -mtriple=bpf < %s | FileCheck %s --check-prefix=BPF
+; RUN: opt -S -passes=expand-memcmp -mtriple=bpf -mcpu=v3 < %s | FileCheck %s --check-prefix=BPF-V3
+;
+; Source code:
+; /* set aligned 4 to minimize the number of loads */
+; struct build_id {
+; unsigned char id[20];
+; } __attribute__((aligned(4)));
+;
+; /* try to compute a local build_id */
+; void bar1(ptr);
+;
+; /* the global build_id to compare */
+; struct build_id id2;
+;
+; int foo()
+; {
+; struct build_id id1;
+;
+; bar1(&id1);
+; return __builtin_memcmp(&id1, &id2, sizeof(id1)) == 0;
+; }
+; Compilation flags:
+; clang -target bpf -S -O2 t.c -emit-llvm
+
+%struct.build_id = type { [20 x i8] }
+
+ at id2 = dso_local global %struct.build_id zeroinitializer, align 4
+
+; Function Attrs: noinline nounwind
+define dso_local i32 @foo() #0 {
+; BPF-LABEL: define dso_local i32 @foo(
+; BPF-SAME: ) #[[ATTR0:[0-9]+]] {
+; BPF-NEXT: entry:
+; BPF-NEXT: [[ID1:%.*]] = alloca [[STRUCT_BUILD_ID:%.*]], align 4
+; BPF-NEXT: call void @bar1(ptr noundef [[ID1]])
+; BPF-NEXT: br label [[LOADBB:%.*]]
+; BPF: res_block:
+; BPF-NEXT: br label [[ENDBLOCK:%.*]]
+; BPF: loadbb:
+; BPF-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID1]], align 4
+; BPF-NEXT: [[TMP1:%.*]] = load i64, ptr @id2, align 4
+; BPF-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP0]], [[TMP1]]
+; BPF-NEXT: br i1 [[TMP2]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; BPF: loadbb1:
+; BPF-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[ID1]], i64 8
+; BPF-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 4
+; BPF-NEXT: [[TMP5:%.*]] = load i64, ptr getelementptr (i8, ptr @id2, i64 8), align 4
+; BPF-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]]
+; BPF-NEXT: br i1 [[TMP6]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; BPF: loadbb2:
+; BPF-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[ID1]], i64 16
+; BPF-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+; BPF-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr (i8, ptr @id2, i64 16), align 4
+; BPF-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP8]], [[TMP9]]
+; BPF-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; BPF: endblock:
+; BPF-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ]
+; BPF-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; BPF-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; BPF-NEXT: ret i32 [[CONV]]
+;
+; BPF-V3-LABEL: define dso_local i32 @foo(
+; BPF-V3-SAME: ) #[[ATTR0:[0-9]+]] {
+; BPF-V3-NEXT: entry:
+; BPF-V3-NEXT: [[ID1:%.*]] = alloca [[STRUCT_BUILD_ID:%.*]], align 4
+; BPF-V3-NEXT: call void @bar1(ptr noundef [[ID1]])
+; BPF-V3-NEXT: br label [[LOADBB:%.*]]
+; BPF-V3: res_block:
+; BPF-V3-NEXT: br label [[ENDBLOCK:%.*]]
+; BPF-V3: loadbb:
+; BPF-V3-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID1]], align 4
+; BPF-V3-NEXT: [[TMP1:%.*]] = load i64, ptr @id2, align 4
+; BPF-V3-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP0]], [[TMP1]]
+; BPF-V3-NEXT: br i1 [[TMP2]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; BPF-V3: loadbb1:
+; BPF-V3-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[ID1]], i64 8
+; BPF-V3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 4
+; BPF-V3-NEXT: [[TMP5:%.*]] = load i64, ptr getelementptr (i8, ptr @id2, i64 8), align 4
+; BPF-V3-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]]
+; BPF-V3-NEXT: br i1 [[TMP6]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; BPF-V3: loadbb2:
+; BPF-V3-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[ID1]], i64 16
+; BPF-V3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+; BPF-V3-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr (i8, ptr @id2, i64 16), align 4
+; BPF-V3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP8]], [[TMP9]]
+; BPF-V3-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; BPF-V3: endblock:
+; BPF-V3-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ]
+; BPF-V3-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; BPF-V3-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; BPF-V3-NEXT: ret i32 [[CONV]]
+;
+entry:
+ %id1 = alloca %struct.build_id, align 4
+ call void @bar1(ptr noundef %id1)
+ %call = call i32 @memcmp(ptr noundef %id1, ptr noundef @id2, i64 noundef 20) #3
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+declare dso_local void @bar1(ptr noundef) #1
+
+; Function Attrs: nounwind
+declare dso_local i32 @memcmp(ptr noundef, ptr noundef, i64 noundef) #2
+
+attributes #0 = { noinline nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 18.0.0git (git at github.com:llvm/llvm-project.git a776740d6296520b8bde156aa3f8d9ecb32cddd9)"}
diff --git a/llvm/test/Transforms/ExpandMemCmp/PowerPC/lit.local.cfg b/llvm/test/Transforms/ExpandMemCmp/PowerPC/lit.local.cfg
new file mode 100644
index 00000000000000..bb982488eb15ee
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "PowerPC" in config.root.targets:
+ config.unsupported = True
diff --git a/llvm/test/Transforms/ExpandMemCmp/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memCmpUsedInZeroEqualityComparison.ll
new file mode 100644
index 00000000000000..9a75b147e7e1fb
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memCmpUsedInZeroEqualityComparison.ll
@@ -0,0 +1,218 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+ at zeroEqualityTest01.buffer1 = private unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 4], align 4
+ at zeroEqualityTest01.buffer2 = private unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 3], align 4
+ at zeroEqualityTest02.buffer1 = private unnamed_addr constant [4 x i32] [i32 4, i32 0, i32 0, i32 0], align 4
+ at zeroEqualityTest02.buffer2 = private unnamed_addr constant [4 x i32] [i32 3, i32 0, i32 0, i32 0], align 4
+ at zeroEqualityTest03.buffer1 = private unnamed_addr constant [4 x i32] [i32 0, i32 0, i32 0, i32 3], align 4
+ at zeroEqualityTest03.buffer2 = private unnamed_addr constant [4 x i32] [i32 0, i32 0, i32 0, i32 4], align 4
+ at zeroEqualityTest04.buffer1 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14], align 4
+ at zeroEqualityTest04.buffer2 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 13], align 4
+
+declare signext i32 @memcmp(ptr nocapture, ptr nocapture, i64) local_unnamed_addr #1
+
+; Check 4 bytes - requires 1 load for each param.
+define signext i32 @zeroEqualityTest02(ptr %x, ptr %y) {
+; CHECK-LABEL: define signext i32 @zeroEqualityTest02(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[DOT:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: ret i32 [[DOT]]
+;
+ %call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 4)
+ %not.cmp = icmp ne i32 %call, 0
+ %. = zext i1 %not.cmp to i32
+ ret i32 %.
+}
+
+; Check 16 bytes - requires 2 loads for each param (or use vectors?).
+define signext i32 @zeroEqualityTest01(ptr %x, ptr %y) {
+; CHECK-LABEL: define signext i32 @zeroEqualityTest01(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp ne i32 [[PHI_RES]], 0
+; CHECK-NEXT: [[DOT:%.*]] = zext i1 [[NOT_TOBOOL]] to i32
+; CHECK-NEXT: ret i32 [[DOT]]
+;
+ %call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 16)
+ %not.tobool = icmp ne i32 %call, 0
+ %. = zext i1 %not.tobool to i32
+ ret i32 %.
+}
+
+; Check 7 bytes - requires 3 loads for each param.
+define signext i32 @zeroEqualityTest03(ptr %x, ptr %y) {
+; CHECK-LABEL: define signext i32 @zeroEqualityTest03(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i8 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ]
+; CHECK-NEXT: [[NOT_LNOT:%.*]] = icmp ne i32 [[PHI_RES]], 0
+; CHECK-NEXT: [[COND:%.*]] = zext i1 [[NOT_LNOT]] to i32
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 7)
+ %not.lnot = icmp ne i32 %call, 0
+ %cond = zext i1 %not.lnot to i32
+ ret i32 %cond
+}
+
+; Validate with > 0
+define signext i32 @zeroEqualityTest04() {
+; CHECK-LABEL: define signext i32 @zeroEqualityTest04(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ 288230376151711744, [[LOADBB]] ], [ 0, [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 216172782113783808, [[LOADBB]] ], [ 0, [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: br i1 false, label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: br i1 true, label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[NOT_CMP:%.*]] = icmp slt i32 [[PHI_RES]], 1
+; CHECK-NEXT: [[DOT:%.*]] = zext i1 [[NOT_CMP]] to i32
+; CHECK-NEXT: ret i32 [[DOT]]
+;
+ %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest02.buffer1, ptr @zeroEqualityTest02.buffer2, i64 16)
+ %not.cmp = icmp slt i32 %call, 1
+ %. = zext i1 %not.cmp to i32
+ ret i32 %.
+}
+
+; Validate with < 0
+define signext i32 @zeroEqualityTest05() {
+; CHECK-LABEL: define signext i32 @zeroEqualityTest05(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ 0, [[LOADBB]] ], [ 50331648, [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 0, [[LOADBB]] ], [ 67108864, [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: br i1 true, label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: br i1 false, label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[CALL_LOBIT:%.*]] = lshr i32 [[PHI_RES]], 31
+; CHECK-NEXT: [[CALL_LOBIT_NOT:%.*]] = xor i32 [[CALL_LOBIT]], 1
+; CHECK-NEXT: ret i32 [[CALL_LOBIT_NOT]]
+;
+ %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest03.buffer1, ptr @zeroEqualityTest03.buffer2, i64 16)
+ %call.lobit = lshr i32 %call, 31
+ %call.lobit.not = xor i32 %call.lobit, 1
+ ret i32 %call.lobit.not
+}
+
+; Validate with memcmp()?:
+define signext i32 @equalityFoldTwoConstants() {
+; CHECK-LABEL: define signext i32 @equalityFoldTwoConstants(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: br i1 false, label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: br i1 false, label [[RES_BLOCK]], label [[ENDBLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; CHECK-NEXT: [[COND:%.*]] = zext i1 [[NOT_TOBOOL]] to i32
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest04.buffer1, ptr @zeroEqualityTest04.buffer2, i64 16)
+ %not.tobool = icmp eq i32 %call, 0
+ %cond = zext i1 %not.tobool to i32
+ ret i32 %cond
+}
+
+define signext i32 @equalityFoldOneConstant(ptr %X) {
+; CHECK-LABEL: define signext i32 @equalityFoldOneConstant(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 4294967296, [[TMP1]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 12884901890, [[TMP4]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; CHECK-NEXT: [[COND:%.*]] = zext i1 [[NOT_TOBOOL]] to i32
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest04.buffer1, ptr %X, i64 16)
+ %not.tobool = icmp eq i32 %call, 0
+ %cond = zext i1 %not.tobool to i32
+ ret i32 %cond
+}
+
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[M:%.*]] = tail call signext i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call signext i32 @memcmp(ptr %X, ptr %Y, i64 2) nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
diff --git a/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp-mergeexpand.ll b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp-mergeexpand.ll
new file mode 100644
index 00000000000000..ffc49478cfa4d3
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp-mergeexpand.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s
+
+; This tests interaction between MergeICmp and expand-memcmp.
+
+%"struct.std::pair" = type { i32, i32 }
+
+define zeroext i1 @opeq1(
+; CHECK-LABEL: define zeroext i1 @opeq1(
+; CHECK-SAME: ptr nocapture readonly dereferenceable(8) [[A:%.*]], ptr nocapture readonly dereferenceable(8) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
+; CHECK: land.rhs.i:
+; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", ptr [[A]], i64 0, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SECOND_I]], align 4
+; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", ptr [[B]], i64 0, i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[SECOND2_I]], align 4
+; CHECK-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: br label [[OPEQ1_EXIT]]
+; CHECK: opeq1.exit:
+; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
+; CHECK-NEXT: ret i1 [[TMP4]]
+;
+ ptr nocapture readonly dereferenceable(8) %a,
+ ptr nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+entry:
+ %0 = load i32, ptr %a, align 4
+ %1 = load i32, ptr %b, align 4
+ %cmp.i = icmp eq i32 %0, %1
+ br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+ %second.i = getelementptr inbounds %"struct.std::pair", ptr %a, i64 0, i32 1
+ %2 = load i32, ptr %second.i, align 4
+ %second2.i = getelementptr inbounds %"struct.std::pair", ptr %b, i64 0, i32 1
+ %3 = load i32, ptr %second2.i, align 4
+ %cmp3.i = icmp eq i32 %2, %3
+ br label %opeq1.exit
+
+opeq1.exit:
+ %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
+ ret i1 %4
+}
+
+
diff --git a/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp.ll
new file mode 100644
index 00000000000000..21cdbd65544c4c
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s
+
+define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
+; CHECK-LABEL: define signext i32 @memcmp8(
+; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[BUFFER1]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[BUFFER2]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: ret i32 [[TMP9]]
+;
+ %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8)
+ ret i32 %call
+}
+
+define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
+; CHECK-LABEL: define signext i32 @memcmp4(
+; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[BUFFER1]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[BUFFER2]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: ret i32 [[TMP9]]
+;
+ %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)
+ ret i32 %call
+}
+
+define signext i32 @memcmp2(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
+; CHECK-LABEL: define signext i32 @memcmp2(
+; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[BUFFER1]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[BUFFER2]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: ret i32 [[TMP7]]
+;
+ %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 2)
+ ret i32 %call
+}
+
+define signext i32 @memcmp1(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
+; CHECK-LABEL: define signext i32 @memcmp1(
+; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[BUFFER1]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[BUFFER2]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP2]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 1) #2
+ ret i32 %call
+}
+
+declare signext i32 @memcmp(ptr, ptr, i64)
diff --git a/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmpIR.ll b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmpIR.ll
new file mode 100644
index 00000000000000..3ad0c9d12ea0bc
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmpIR.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s
+
+define signext i32 @test1(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
+; CHECK-LABEL: define signext i32 @test1(
+; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP11:%.*]], [[LOADBB1:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[BUFFER1]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[BUFFER2]], align 1
+; CHECK-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: br i1 [[TMP6]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BUFFER1]], i64 8
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BUFFER2]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11]] = call i64 @llvm.bswap.i64(i64 [[TMP9]])
+; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP1]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+entry:
+
+
+
+ ; CHECK-BE-LABEL: @test1(
+ ; CHECK-BE-LABEL: res_block:{{.*}}
+ ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
+ ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
+ ; CHECK-BE-NEXT: br label %endblock
+
+ ; CHECK-BE-LABEL: loadbb:{{.*}}
+ ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, ptr
+ ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr
+ ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
+ ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block
+
+ ; CHECK-BE-LABEL: loadbb1:{{.*}}
+ ; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, ptr {{.*}}, i64 8
+ ; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, ptr {{.*}}, i64 8
+ ; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, ptr [[GEP1]]
+ ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr [[GEP2]]
+ ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
+ ; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block
+
+ %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 16)
+ ret i32 %call
+}
+
+declare signext i32 @memcmp(ptr nocapture, ptr nocapture, i64) local_unnamed_addr #1
+
+define signext i32 @test2(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
+; CHECK-LABEL: define signext i32 @test2(
+; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[BUFFER1]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[BUFFER2]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP4]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: ret i32 [[TMP8]]
+;
+
+ ; CHECK-BE-LABEL: @test2(
+ ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, ptr
+ ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr
+ ; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[LOAD1]], [[LOAD2]]
+ ; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]]
+ ; CHECK-BE-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
+ ; CHECK-BE-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
+ ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
+ ; CHECK-BE-NEXT: ret i32 [[SUB]]
+
+entry:
+ %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)
+ ret i32 %call
+}
+
+define signext i32 @test3(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
+; CHECK-LABEL: define signext i32 @test3(
+; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOADBB:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1:%.*]] ], [ [[TMP22:%.*]], [[LOADBB2:%.*]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1]] ], [ [[TMP23:%.*]], [[LOADBB2]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb:
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[BUFFER1]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[BUFFER2]], align 1
+; CHECK-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: br i1 [[TMP6]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BUFFER1]], i64 8
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BUFFER2]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP9]])
+; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = zext i32 [[TMP11]] to i64
+; CHECK-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[TMP13]], [[TMP14]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[LOADBB2]], label [[RES_BLOCK]]
+; CHECK: loadbb2:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[BUFFER1]], i64 12
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[BUFFER2]], i64 12
+; CHECK-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 1
+; CHECK-NEXT: [[TMP20:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP19]])
+; CHECK-NEXT: [[TMP22]] = zext i16 [[TMP20]] to i64
+; CHECK-NEXT: [[TMP23]] = zext i16 [[TMP21]] to i64
+; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[TMP22]], [[TMP23]]
+; CHECK-NEXT: br i1 [[TMP24]], label [[LOADBB3:%.*]], label [[RES_BLOCK]]
+; CHECK: loadbb3:
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[BUFFER1]], i64 14
+; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[BUFFER2]], i64 14
+; CHECK-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP25]], align 1
+; CHECK-NEXT: [[TMP28:%.*]] = load i8, ptr [[TMP26]], align 1
+; CHECK-NEXT: [[TMP29:%.*]] = zext i8 [[TMP27]] to i32
+; CHECK-NEXT: [[TMP30:%.*]] = zext i8 [[TMP28]] to i32
+; CHECK-NEXT: [[TMP31:%.*]] = sub i32 [[TMP29]], [[TMP30]]
+; CHECK-NEXT: br label [[ENDBLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP31]], [[LOADBB3]] ], [ [[TMP1]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+
+
+ ; CHECK-BE-LABEL: res_block:{{.*}}
+ ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
+ ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
+ ; CHECK-BE-NEXT: br label %endblock
+
+ ; CHECK-BE-LABEL: loadbb:{{.*}}
+ ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, ptr
+ ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr
+ ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
+ ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block
+
+ ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, ptr
+ ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr
+ ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64
+ ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64
+ ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
+ ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block
+
+ ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i16, ptr
+ ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i16, ptr
+ ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[LOAD1]] to i64
+ ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[LOAD2]] to i64
+ ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
+ ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block
+
+ ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i8, ptr
+ ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i8, ptr
+ ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32
+ ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32
+ ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
+ ; CHECK-BE-NEXT: br label %endblock
+
+entry:
+ %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 15)
+ ret i32 %call
+}
+ ; CHECK-BE: call = tail call signext i32 @memcmp
+define signext i32 @test4(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) {
+; CHECK-LABEL: define signext i32 @test4(
+; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = tail call signext i32 @memcmp(ptr [[BUFFER1]], ptr [[BUFFER2]], i64 65)
+; CHECK-NEXT: ret i32 [[CALL]]
+;
+entry:
+ %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 65)
+ ret i32 %call
+}
+
+define signext i32 @test5(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2, i32 signext %SIZE) {
+; CHECK-LABEL: define signext i32 @test5(
+; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]], i32 signext [[SIZE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[SIZE]] to i64
+; CHECK-NEXT: [[CALL:%.*]] = tail call signext i32 @memcmp(ptr [[BUFFER1]], ptr [[BUFFER2]], i64 [[CONV]])
+; CHECK-NEXT: ret i32 [[CALL]]
+;
+ ; CHECK-BE: call = tail call signext i32 @memcmp
+entry:
+ %conv = sext i32 %SIZE to i64
+ %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 %conv)
+ ret i32 %call
+}
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll
index 41d357728b93e7..5877d00a818c5f 100644
--- a/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll
@@ -1,16 +1,16 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64
declare i32 @bcmp(ptr nocapture, ptr nocapture, i64)
define i32 @bcmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @bcmp8(
-; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X64-NEXT: ret i32 [[TMP6]]
+; X64-LABEL: define i32 @bcmp8(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: ret i32 [[TMP4]]
;
%call = tail call i32 @bcmp(ptr %x, ptr %y, i64 8)
ret i32 %call
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-2.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-2.ll
new file mode 100644
index 00000000000000..4424488a7fffb1
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-2.ll
@@ -0,0 +1,20249 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=X64
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=sse4.1 < %s | FileCheck %s --check-prefixes=X64-SSE41
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefixes=X64-AVX1
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s --check-prefixes=X64-AVX2
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit < %s | FileCheck %s --check-prefixes=X64-AVX512BW-256
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit < %s | FileCheck %s --check-prefixes=X64-AVX512BW
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-AVX512F-256
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-AVX512F
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-MIC-AVX2
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-MIC-AVX512F
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+ at .str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1
+
+declare dso_local i32 @memcmp(ptr, ptr, i64)
+
+define i32 @length0(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length0(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; X64-NEXT: ret i32 0
+;
+; X64-SSE41-LABEL: define i32 @length0(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-SSE41-NEXT: ret i32 0
+;
+; X64-AVX1-LABEL: define i32 @length0(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX1-NEXT: ret i32 0
+;
+; X64-AVX2-LABEL: define i32 @length0(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX2-NEXT: ret i32 0
+;
+; X64-AVX512BW-256-LABEL: define i32 @length0(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX512BW-256-NEXT: ret i32 0
+;
+; X64-AVX512BW-LABEL: define i32 @length0(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX512BW-NEXT: ret i32 0
+;
+; X64-AVX512F-256-LABEL: define i32 @length0(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX512F-256-NEXT: ret i32 0
+;
+; X64-AVX512F-LABEL: define i32 @length0(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX512F-NEXT: ret i32 0
+;
+; X64-MIC-AVX2-LABEL: define i32 @length0(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-MIC-AVX2-NEXT: ret i32 0
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length0(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-MIC-AVX512F-NEXT: ret i32 0
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
+ ret i32 %m
+ }
+
+define i1 @length0_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length0_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: ret i1 true
+;
+; X64-SSE41-LABEL: define i1 @length0_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: ret i1 true
+;
+; X64-AVX1-LABEL: define i1 @length0_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: ret i1 true
+;
+; X64-AVX2-LABEL: define i1 @length0_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: ret i1 true
+;
+; X64-AVX512BW-256-LABEL: define i1 @length0_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: ret i1 true
+;
+; X64-AVX512BW-LABEL: define i1 @length0_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: ret i1 true
+;
+; X64-AVX512F-256-LABEL: define i1 @length0_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: ret i1 true
+;
+; X64-AVX512F-LABEL: define i1 @length0_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: ret i1 true
+;
+; X64-MIC-AVX2-LABEL: define i1 @length0_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: ret i1 true
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length0_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: ret i1 true
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length0_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: ret i1 false
+;
+; X64-SSE41-LABEL: define i1 @length0_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: ret i1 false
+;
+; X64-AVX1-LABEL: define i1 @length0_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: ret i1 false
+;
+; X64-AVX2-LABEL: define i1 @length0_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: ret i1 false
+;
+; X64-AVX512BW-256-LABEL: define i1 @length0_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: ret i1 false
+;
+; X64-AVX512BW-LABEL: define i1 @length0_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: ret i1 false
+;
+; X64-AVX512F-256-LABEL: define i1 @length0_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: ret i1 false
+;
+; X64-AVX512F-LABEL: define i1 @length0_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: ret i1 false
+;
+; X64-MIC-AVX2-LABEL: define i1 @length0_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: ret i1 false
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length0_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: ret i1 false
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length2(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length2(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: ret i32 [[TMP7]]
+;
+; X64-SSE41-LABEL: define i32 @length2(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX1-LABEL: define i32 @length2(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX2-LABEL: define i32 @length2(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length2(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX512BW-LABEL: define i32 @length2(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length2(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX512F-LABEL: define i32 @length2(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: ret i32 [[TMP7]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length2(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: ret i32 [[TMP7]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length2(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[TMP7]]
+;
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ ret i32 %m
+}
+
+define i32 @length2_const(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length2_const(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-NEXT: ret i32 [[TMP4]]
+;
+; X64-SSE41-LABEL: define i32 @length2_const(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-SSE41-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-SSE41-NEXT: ret i32 [[TMP4]]
+;
+; X64-AVX1-LABEL: define i32 @length2_const(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-AVX1-NEXT: ret i32 [[TMP4]]
+;
+; X64-AVX2-LABEL: define i32 @length2_const(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-AVX2-NEXT: ret i32 [[TMP4]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length2_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-AVX512BW-256-NEXT: ret i32 [[TMP4]]
+;
+; X64-AVX512BW-LABEL: define i32 @length2_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-AVX512BW-NEXT: ret i32 [[TMP4]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length2_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-AVX512F-256-NEXT: ret i32 [[TMP4]]
+;
+; X64-AVX512F-LABEL: define i32 @length2_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-AVX512F-NEXT: ret i32 [[TMP4]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length2_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-MIC-AVX2-NEXT: ret i32 [[TMP4]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length2_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-MIC-AVX512F-NEXT: ret i32 [[TMP4]]
+;
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length2_gt_const(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length2_gt_const(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-SSE41-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_gt_const(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-AVX1-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_gt_const(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length2_gt_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length2_gt_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length2_gt_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length2_gt_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length2_gt_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length2_gt_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length2_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length2_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length2_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length2_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length2_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length2_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length2_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length2_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length2_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length2_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length2_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length2_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length2_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length2_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length2_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length2_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length2_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length2_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length2_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length2_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length2_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length2_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length2_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length2_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length2_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: ret i1 [[TMP2]]
+;
+; X64-SSE41-LABEL: define i1 @length2_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length2_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512BW-LABEL: define i1 @length2_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length2_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512F-LABEL: define i1 @length2_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP2]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length2_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP2]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length2_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP2]]
+;
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length3(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length3(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length3(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br label [[ENDBLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length3(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br label [[ENDBLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length3(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length3(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length3(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length3(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length3(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length3(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length3(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length3_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: ret i1 [[TMP12]]
+;
+; X64-SSE41-LABEL: define i1 @length3_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX1-LABEL: define i1 @length3_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX2-LABEL: define i1 @length3_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length3_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512BW-LABEL: define i1 @length3_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length3_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-LABEL: define i1 @length3_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length3_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length3_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length4(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-NEXT: ret i32 [[TMP9]]
+;
+; X64-SSE41-LABEL: define i32 @length4(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-SSE41-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-SSE41-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-SSE41-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-SSE41-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX1-LABEL: define i32 @length4(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX1-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX2-LABEL: define i32 @length4(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX2-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length4(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512BW-LABEL: define i32 @length4(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512BW-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length4(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512F-256-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512F-LABEL: define i32 @length4(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512F-NEXT: ret i32 [[TMP9]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length4(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: ret i32 [[TMP9]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length4(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[TMP9]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length4_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: ret i1 [[TMP3]]
+;
+; X64-SSE41-LABEL: define i1 @length4_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX1-LABEL: define i1 @length4_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX2-LABEL: define i1 @length4_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length4_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512BW-LABEL: define i1 @length4_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length4_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512F-LABEL: define i1 @length4_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP3]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length4_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP3]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length4_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP3]]
+;
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length4_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: ret i1 [[TMP5]]
+;
+; X64-SSE41-LABEL: define i1 @length4_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-SSE41-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX1-LABEL: define i1 @length4_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX2-LABEL: define i1 @length4_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length4_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512BW-LABEL: define i1 @length4_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length4_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-256-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512F-LABEL: define i1 @length4_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-NEXT: ret i1 [[TMP5]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length4_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP5]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length4_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP5]]
+;
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length4_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: ret i1 [[TMP5]]
+;
+; X64-SSE41-LABEL: define i1 @length4_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-SSE41-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX1-LABEL: define i1 @length4_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX2-LABEL: define i1 @length4_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length4_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512BW-LABEL: define i1 @length4_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length4_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-256-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512F-LABEL: define i1 @length4_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-NEXT: ret i1 [[TMP5]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length4_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP5]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length4_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP5]]
+;
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length4_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length4_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length4_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length4_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length4_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length4_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length4_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length4_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length4_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length4_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length5(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length5(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length5(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br label [[ENDBLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length5(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br label [[ENDBLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length5(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length5(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length5(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length5(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length5(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length5(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length5(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length5_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: ret i1 [[TMP12]]
+;
+; X64-SSE41-LABEL: define i1 @length5_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX1-LABEL: define i1 @length5_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX2-LABEL: define i1 @length5_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length5_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512BW-LABEL: define i1 @length5_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length5_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-LABEL: define i1 @length5_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length5_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length5_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length5_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length5_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br label [[ENDBLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length5_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br label [[ENDBLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length5_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length5_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length5_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length5_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length5_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length5_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length5_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length7(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length7(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length7(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length7(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length7(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length7(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length7(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length7(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length7(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length7(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length7(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
+ ret i32 %m
+}
+
+define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length7_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length7_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length7_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length7_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length7_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length7_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length7_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length7_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length7_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length7_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length7_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length7_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: ret i1 [[TMP10]]
+;
+; X64-SSE41-LABEL: define i1 @length7_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX1-LABEL: define i1 @length7_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX2-LABEL: define i1 @length7_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length7_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512BW-LABEL: define i1 @length7_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length7_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-LABEL: define i1 @length7_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length7_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length7_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length8(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-NEXT: ret i32 [[TMP9]]
+;
+; X64-SSE41-LABEL: define i32 @length8(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-SSE41-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-SSE41-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-SSE41-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-SSE41-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX1-LABEL: define i32 @length8(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX1-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX2-LABEL: define i32 @length8(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX2-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length8(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512BW-LABEL: define i32 @length8(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512BW-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length8(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512F-256-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512F-LABEL: define i32 @length8(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512F-NEXT: ret i32 [[TMP9]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length8(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: ret i32 [[TMP9]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length8(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[TMP9]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length8_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length8_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length8_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length8_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length8_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length8_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length8_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length8_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length8_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length8_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length8_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length8_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: ret i1 [[TMP2]]
+;
+; X64-SSE41-LABEL: define i1 @length8_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX1-LABEL: define i1 @length8_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX2-LABEL: define i1 @length8_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length8_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512BW-LABEL: define i1 @length8_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length8_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512F-LABEL: define i1 @length8_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP2]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length8_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP2]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length8_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP2]]
+;
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length9_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length9_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length9_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length9_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length9_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length9_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length9_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length9_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length9_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length9_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length10_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length10_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length10_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length10_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length10_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length10_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length10_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length10_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length10_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length10_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length10_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length11_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length11_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length11_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length11_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length11_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length11_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length11_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length11_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length11_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length11_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length11_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length12_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length12_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: ret i1 [[TMP12]]
+;
+; X64-SSE41-LABEL: define i1 @length12_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX1-LABEL: define i1 @length12_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX2-LABEL: define i1 @length12_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length12_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512BW-LABEL: define i1 @length12_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length12_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-LABEL: define i1 @length12_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length12_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length12_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length12(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length12(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-SSE41-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-SSE41-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-SSE41-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length12(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX1-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX1-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length12(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX2-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX2-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length12(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length12(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX512BW-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX512BW-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length12(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX512F-256-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length12(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX512F-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX512F-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length12(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length12(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
+ ret i32 %m
+}
+
+define i1 @length13_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length13_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length13_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length13_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length13_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length13_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length13_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length13_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length13_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length13_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length13_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length14_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length14_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length14_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length14_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length14_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length14_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length14_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length14_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length14_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length14_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length14_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length15(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length15(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length15(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length15(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length15(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length15(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length15(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length15(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length15(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length15(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length15(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
+ ret i32 %m
+}
+
+define i1 @length15_lt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length15_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length15_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length15_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length15_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length15_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length15_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length15_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length15_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length15_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length15_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length15_const(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length15_const(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length15_const(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-SSE41-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-SSE41-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-SSE41-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-SSE41-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length15_const(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-AVX1-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-AVX1-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-AVX1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-AVX1-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length15_const(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-AVX2-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-AVX2-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-AVX2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-AVX2-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length15_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-AVX512BW-256-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-AVX512BW-256-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length15_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-AVX512BW-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-AVX512BW-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-AVX512BW-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length15_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-AVX512F-256-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-AVX512F-256-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length15_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-AVX512F-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-AVX512F-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-AVX512F-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length15_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-MIC-AVX2-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-MIC-AVX2-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length15_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind
+ ret i32 %m
+}
+
+define i1 @length15_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length15_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length15_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length15_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length15_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length15_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length15_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length15_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length15_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length15_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length15_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length15_gt_const(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length15_gt_const(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-SSE41-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-SSE41-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-SSE41-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-SSE41-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length15_gt_const(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-AVX1-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-AVX1-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-AVX1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-AVX1-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length15_gt_const(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-AVX2-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-AVX2-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-AVX2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-AVX2-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length15_gt_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-AVX512BW-256-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-AVX512BW-256-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length15_gt_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-AVX512BW-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-AVX512BW-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-AVX512BW-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length15_gt_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-AVX512F-256-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-AVX512F-256-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length15_gt_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-AVX512F-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-AVX512F-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-AVX512F-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length15_gt_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-MIC-AVX2-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-MIC-AVX2-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length15_gt_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length16(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length16(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length16(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length16(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length16(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length16(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length16(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length16(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length16(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length16(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(ptr %x, ptr %y) nounwind {
+;
+; X64-LABEL: define i1 @length16_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: ret i1 [[TMP3]]
+;
+; X64-SSE41-LABEL: define i1 @length16_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX1-LABEL: define i1 @length16_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX2-LABEL: define i1 @length16_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length16_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512BW-LABEL: define i1 @length16_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length16_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512F-LABEL: define i1 @length16_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP3]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length16_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP3]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length16_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX-LABEL: length16_eq:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: setne %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length16_eq:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; X64-MIC-AVX-NEXT: kortestw %k0, %k0
+; X64-MIC-AVX-NEXT: setne %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length16_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length16_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length16_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length16_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length16_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length16_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length16_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length16_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length16_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length16_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length16_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length16_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length16_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length16_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length16_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length16_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length16_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length16_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length16_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length16_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_eq_const(ptr %X) nounwind {
+;
+; X64-LABEL: define i1 @length16_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length16_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length16_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length16_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length16_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length16_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length16_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length16_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length16_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length16_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-AVX-LABEL: length16_eq_const:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length16_eq_const:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; X64-MIC-AVX-NEXT: kortestw %k0, %k0
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
+
+define i32 @length24(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length24(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length24(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length24(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length24(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length24(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length24(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length24(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length24(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length24(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length24(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
+ ret i32 %m
+}
+
+define i1 @length24_eq(ptr %x, ptr %y) nounwind {
+;
+; X64-LABEL: define i1 @length24_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length24_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length24_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length24_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length24_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length24_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length24_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length24_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length24_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length24_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX-LABEL: length24_eq:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length24_eq:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1
+; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length24_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length24_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length24_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length24_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length24_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length24_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length24_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length24_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length24_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length24_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length24_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length24_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length24_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length24_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length24_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length24_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length24_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length24_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length24_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length24_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_eq_const(ptr %X) nounwind {
+;
+; X64-LABEL: define i1 @length24_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-NEXT: ret i1 [[TMP8]]
+;
+; X64-SSE41-LABEL: define i1 @length24_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-SSE41-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-SSE41-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-SSE41-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX1-LABEL: define i1 @length24_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX1-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX1-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX1-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX2-LABEL: define i1 @length24_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX2-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX2-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length24_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512BW-LABEL: define i1 @length24_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length24_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512F-LABEL: define i1 @length24_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP8]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length24_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP8]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length24_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX-LABEL: length24_eq_const:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: setne %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length24_eq_const:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [959985462,858927408,0,0]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: setne %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length31(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length31(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length31(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb3:
+; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length31(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb3:
+; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length31(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb3:
+; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length31(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb3:
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length31(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb3:
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length31(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb3:
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length31(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb3:
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length31(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb3:
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length31(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb3:
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind
+ ret i32 %m
+}
+
+define i1 @length31_eq(ptr %x, ptr %y) nounwind {
+;
+; X64-LABEL: define i1 @length31_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length31_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length31_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length31_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length31_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length31_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length31_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length31_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length31_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length31_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX-LABEL: length31_eq:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length31_eq:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2
+; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length31_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length31_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb3:
+; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length31_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb3:
+; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length31_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb3:
+; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length31_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb3:
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length31_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb3:
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length31_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb3:
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length31_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb3:
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length31_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb3:
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length31_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb3:
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length31_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length31_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb3:
+; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length31_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb3:
+; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length31_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb3:
+; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length31_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb3:
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length31_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb3:
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length31_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb3:
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length31_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb3:
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length31_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb3:
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length31_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb3:
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+;
+; X64-LABEL: define i1 @length31_eq_prefer128(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length31_eq_prefer128(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length31_eq_prefer128(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length31_eq_prefer128(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length31_eq_prefer128(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length31_eq_prefer128(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length31_eq_prefer128(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length31_eq_prefer128(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length31_eq_prefer128(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length31_eq_prefer128(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX-LABEL: length31_eq_prefer128:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length31_eq_prefer128:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2
+; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_eq_const(ptr %X) nounwind {
+;
+; X64-LABEL: define i1 @length31_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-NEXT: ret i1 [[TMP7]]
+;
+; X64-SSE41-LABEL: define i1 @length31_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX1-LABEL: define i1 @length31_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-AVX1-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX2-LABEL: define i1 @length31_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-AVX2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length31_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX512BW-LABEL: define i1 @length31_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length31_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX512F-LABEL: define i1 @length31_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP7]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length31_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP7]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length31_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX-LABEL: length31_eq_const:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: setne %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length31_eq_const:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [943142453,842084409,909456435,809056311]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: setne %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length32(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length32(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length32(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb3:
+; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length32(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb3:
+; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length32(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb3:
+; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length32(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb3:
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length32(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb3:
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length32(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb3:
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length32(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb3:
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length32(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb3:
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length32(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb3:
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(ptr %x, ptr %y) nounwind {
+;
+; X64-LABEL: define i1 @length32_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length32_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length32_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length32_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length32_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length32_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length32_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length32_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length32_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length32_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512-LABEL: length32_eq:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: sete %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+; X64-MIC-AVX-LABEL: length32_eq:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; X64-MIC-AVX-NEXT: kortestw %k0, %k0
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length32_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length32_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb3:
+; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length32_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb3:
+; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length32_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb3:
+; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length32_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb3:
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length32_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb3:
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length32_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb3:
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length32_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb3:
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length32_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb3:
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length32_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb3:
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length32_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length32_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb3:
+; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length32_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb3:
+; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length32_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb3:
+; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length32_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb3:
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length32_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb3:
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length32_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb3:
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length32_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb3:
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length32_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb3:
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length32_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb3:
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+;
+; X64-LABEL: define i1 @length32_eq_prefer128(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length32_eq_prefer128(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length32_eq_prefer128(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length32_eq_prefer128(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length32_eq_prefer128(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length32_eq_prefer128(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length32_eq_prefer128(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length32_eq_prefer128(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length32_eq_prefer128(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length32_eq_prefer128(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX-LABEL: length32_eq_prefer128:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
+; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length32_eq_prefer128:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2
+; X64-MIC-AVX-NEXT: vmovdqu 16(%rsi), %xmm3
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_const(ptr %X) nounwind {
+;
+; X64-LABEL: define i1 @length32_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-NEXT: ret i1 [[TMP7]]
+;
+; X64-SSE41-LABEL: define i1 @length32_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX1-LABEL: define i1 @length32_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX2-LABEL: define i1 @length32_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length32_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512BW-LABEL: define i1 @length32_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length32_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512F-LABEL: define i1 @length32_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP2]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length32_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP2]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length32_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512-LABEL: length32_eq_const:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: setne %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+; X64-MIC-AVX-LABEL: length32_eq_const:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; X64-MIC-AVX-NEXT: kortestw %k0, %k0
+; X64-MIC-AVX-NEXT: setne %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length48(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length48(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length48(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length48(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length48(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length48(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length48(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length48(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length48(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length48(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length48(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind
+ ret i32 %m
+}
+
+define i1 @length48_eq(ptr %x, ptr %y) nounwind {
+; X64-SSE-LABEL: length48_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $48, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length48_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length48_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length48_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length48_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length48_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length48_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length48_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length48_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length48_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length48_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512-LABEL: length48_eq:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1
+; X64-AVX512-NEXT: vmovdqu 32(%rsi), %xmm2
+; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: sete %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+; X64-MIC-AVX-LABEL: length48_eq:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1
+; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2
+; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm3
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length48_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length48_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length48_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length48_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length48_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length48_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length48_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length48_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length48_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length48_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length48_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length48_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length48_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length48_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length48_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length48_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length48_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length48_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length48_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length48_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+; X64-LABEL: define i1 @length48_eq_prefer128(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length48_eq_prefer128(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length48_eq_prefer128(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-AVX1-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-AVX1-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length48_eq_prefer128(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-AVX2-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length48_eq_prefer128(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length48_eq_prefer128(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length48_eq_prefer128(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length48_eq_prefer128(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length48_eq_prefer128(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length48_eq_prefer128(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_eq_const(ptr %X) nounwind {
+; X64-SSE-LABEL: length48_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $48, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length48_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-NEXT: [[TMP10:%.*]] = or i128 [[TMP9]], [[TMP8]]
+; X64-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0
+; X64-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-NEXT: ret i1 [[TMP11]]
+;
+; X64-SSE41-LABEL: define i1 @length48_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = or i128 [[TMP9]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0
+; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP11]]
+;
+; X64-AVX1-LABEL: define i1 @length48_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-AVX1-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-AVX1-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-AVX1-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX2-LABEL: define i1 @length48_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-AVX2-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-AVX2-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length48_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512BW-LABEL: define i1 @length48_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length48_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512F-LABEL: define i1 @length48_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP8]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length48_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP8]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length48_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512-LABEL: length48_eq_const:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1
+; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: setne %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+; X64-MIC-AVX-LABEL: length48_eq_const:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
+; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm1
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,0,0,0,0]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: setne %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length63(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length63(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length63(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length63(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length63(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length63(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length63(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length63(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length63(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length63(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length63(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind
+ ret i32 %m
+}
+
+define i1 @length63_eq(ptr %x, ptr %y) nounwind {
+; X64-SSE-LABEL: length63_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $63, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length63_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 47
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 47
+; X64-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1
+; X64-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]]
+; X64-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]]
+; X64-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]]
+; X64-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0
+; X64-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-NEXT: ret i1 [[TMP22]]
+;
+; X64-SSE41-LABEL: define i1 @length63_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 47
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 47
+; X64-SSE41-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]]
+; X64-SSE41-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]]
+; X64-SSE41-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0
+; X64-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX1-LABEL: define i1 @length63_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX2-LABEL: define i1 @length63_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length63_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512BW-LABEL: define i1 @length63_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length63_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-LABEL: define i1 @length63_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length63_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length63_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512-LABEL: length63_eq:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1
+; X64-AVX512-NEXT: vpxor 31(%rsi), %ymm1, %ymm1
+; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: setne %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+; X64-MIC-AVX-LABEL: length63_eq:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
+; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm2
+; X64-MIC-AVX-NEXT: vmovdqu 31(%rsi), %ymm3
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: setne %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length63_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length63_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length63_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length63_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length63_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length63_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length63_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length63_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length63_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length63_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length63_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length63_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length63_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length63_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length63_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length63_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length63_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length63_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length63_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length63_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_eq_const(ptr %X) nounwind {
+; X64-SSE-LABEL: length63_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $63, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length63_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 47
+; X64-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 66716800424378146251538984255488604215
+; X64-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]]
+; X64-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0
+; X64-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length63_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 47
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 66716800424378146251538984255488604215
+; X64-SSE41-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-SSE41-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]]
+; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0
+; X64-SSE41-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length63_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-AVX1-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length63_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length63_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length63_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length63_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length63_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length63_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length63_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-AVX512-LABEL: length63_eq_const:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1
+; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: sete %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+; X64-MIC-AVX-LABEL: length63_eq_const:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
+; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [875770417,943142453,842084409,909456435,809056311,875770417,943142453,842084409]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length64(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length64(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length64(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length64(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length64(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length64(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length64(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length64(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length64(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length64(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length64(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(ptr %x, ptr %y) nounwind {
+; X64-SSE-LABEL: length64_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $64, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length64_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; X64-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1
+; X64-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]]
+; X64-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]]
+; X64-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]]
+; X64-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0
+; X64-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-NEXT: ret i1 [[TMP22]]
+;
+; X64-SSE41-LABEL: define i1 @length64_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; X64-SSE41-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]]
+; X64-SSE41-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]]
+; X64-SSE41-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0
+; X64-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX1-LABEL: define i1 @length64_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX2-LABEL: define i1 @length64_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length64_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512BW-LABEL: define i1 @length64_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length64_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-LABEL: define i1 @length64_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP3]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length64_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length64_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512-LABEL: length64_eq:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512-NEXT: vpcmpneqd (%rsi), %zmm0, %k0
+; X64-AVX512-NEXT: kortestw %k0, %k0
+; X64-AVX512-NEXT: setne %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length64_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length64_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length64_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length64_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length64_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length64_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length64_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length64_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length64_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length64_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length64_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length64_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length64_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length64_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length64_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length64_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length64_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length64_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length64_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length64_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_eq_const(ptr %X) nounwind {
+; X64-SSE-LABEL: length64_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $64, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length64_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; X64-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 68051240286688436651889234231545575736
+; X64-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]]
+; X64-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0
+; X64-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length64_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 68051240286688436651889234231545575736
+; X64-SSE41-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-SSE41-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]]
+; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0
+; X64-SSE41-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length64_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX1-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length64_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length64_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length64_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length64_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length64_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length64_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length64_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-AVX512-LABEL: length64_eq_const:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0
+; X64-AVX512-NEXT: kortestw %k0, %k0
+; X64-AVX512-NEXT: sete %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length96(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length96(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length96(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length96(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length96(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length96(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length96(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length96(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length96(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length96(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length96(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind
+ ret i32 %m
+}
+
+define i1 @length96_eq(ptr %x, ptr %y) nounwind {
+; X64-SSE-LABEL: length96_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $96, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length96_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length96_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length96_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX1-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]]
+; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0
+; X64-AVX1-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP16]]
+;
+; X64-AVX2-LABEL: define i1 @length96_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]]
+; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0
+; X64-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP16]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length96_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP16]]
+;
+; X64-AVX512BW-LABEL: define i1 @length96_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i256 [[TMP6]] to i512
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i256 [[TMP7]] to i512
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i512 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i512 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i512 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length96_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]]
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP16]]
+;
+; X64-AVX512F-LABEL: define i1 @length96_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i256 [[TMP6]] to i512
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i256 [[TMP7]] to i512
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i512 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i512 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i512 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length96_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP16]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length96_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i256 [[TMP6]] to i512
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i256 [[TMP7]] to i512
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i512 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i512 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i512 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length96_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length96_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length96_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length96_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length96_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length96_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length96_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length96_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length96_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length96_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length96_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length96_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length96_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length96_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length96_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length96_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length96_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length96_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length96_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length96_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_eq_const(ptr %X) nounwind {
+; X64-SSE-LABEL: length96_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $96, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length96_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 96) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length96_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 96) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length96_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0
+; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length96_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0
+; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length96_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length96_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i256 [[TMP5]] to i512
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP6]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length96_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length96_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i256 [[TMP5]] to i512
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP6]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length96_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length96_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i256 [[TMP5]] to i512
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP6]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length127(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length127(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length127(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length127(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length127(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length127(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length127(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length127(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length127(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length127(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length127(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind
+ ret i32 %m
+}
+
+define i1 @length127_eq(ptr %x, ptr %y) nounwind {
+; X64-SSE-LABEL: length127_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $127, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length127_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length127_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length127_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95
+; X64-AVX1-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX1-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX1-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX1-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX2-LABEL: define i1 @length127_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95
+; X64-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length127_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX512BW-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512BW-LABEL: define i1 @length127_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 63
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length127_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX512F-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512F-LABEL: define i1 @length127_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 63
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length127_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-MIC-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP22]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length127_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 63
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length127_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length127_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length127_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length127_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length127_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length127_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length127_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length127_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length127_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length127_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length127_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length127_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length127_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length127_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length127_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length127_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length127_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length127_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length127_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length127_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_eq_const(ptr %X) nounwind {
+; X64-SSE-LABEL: length127_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $127, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length127_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 127) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length127_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 127) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length127_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677
+; X64-AVX1-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX1-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX1-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length127_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677
+; X64-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length127_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length127_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 63), align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length127_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length127_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 63), align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length127_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length127_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 63), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length128(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length128(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length128(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length128(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length128(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length128(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length128(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length128(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length128(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length128(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length128(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind
+ ret i32 %m
+}
+
+define i1 @length128_eq(ptr %x, ptr %y) nounwind {
+; X64-SSE-LABEL: length128_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $128, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length128_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length128_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length128_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96
+; X64-AVX1-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX1-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX1-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX1-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX2-LABEL: define i1 @length128_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96
+; X64-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length128_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX512BW-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512BW-LABEL: define i1 @length128_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length128_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX512F-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512F-LABEL: define i1 @length128_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length128_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-MIC-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP22]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length128_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length128_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length128_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length128_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length128_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length128_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length128_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length128_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length128_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length128_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length128_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length128_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length128_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length128_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length128_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length128_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length128_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length128_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length128_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length128_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length128_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_eq_const(ptr %X) nounwind {
+; X64-SSE-LABEL: length128_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $128, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length128_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 128) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length128_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 128) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length128_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934
+; X64-AVX1-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX1-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX1-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length128_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934
+; X64-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length128_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length128_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length128_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length128_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length128_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length128_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length192(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length192(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length192(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length192(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length192(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length192(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length192(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length192(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length192(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length192(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length192(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind
+ ret i32 %m
+}
+
+define i1 @length192_eq(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length192_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length192_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length192_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length192_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length192_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length192_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = or i512 [[TMP14]], [[TMP13]]
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = icmp ne i512 [[TMP15]], 0
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP16]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length192_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length192_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = or i512 [[TMP14]], [[TMP13]]
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i512 [[TMP15]], 0
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP16]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length192_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length192_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = or i512 [[TMP14]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i512 [[TMP15]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP16]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length192_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length192_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length192_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length192_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length192_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length192_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length192_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length192_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length192_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length192_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length192_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length192_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length192_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length192_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length192_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length192_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length192_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length192_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length192_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length192_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length192_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length192_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length192_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length192_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length192_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length192_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = or i512 [[TMP12]], [[TMP11]]
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp ne i512 [[TMP13]], 0
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length192_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length192_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = or i512 [[TMP12]], [[TMP11]]
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp ne i512 [[TMP13]], 0
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length192_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length192_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = or i512 [[TMP12]], [[TMP11]]
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp ne i512 [[TMP13]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length255(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length255(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length255(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length255(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length255(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length255(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length255(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length255(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length255(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length255(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length255(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind
+ ret i32 %m
+}
+
+define i1 @length255_eq(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length255_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length255_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length255_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length255_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length255_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length255_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 191
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 191
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]]
+; X64-AVX512BW-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]]
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length255_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length255_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 191
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 191
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]]
+; X64-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]]
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP22]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length255_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length255_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 191
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 191
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]]
+; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]]
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP22]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length255_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length255_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length255_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length255_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length255_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length255_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length255_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length255_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length255_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length255_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length255_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length255_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length255_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length255_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length255_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length255_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length255_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length255_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length255_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length255_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length255_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length255_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length255_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length255_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length255_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length255_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 191
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 191), align 1
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]]
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]]
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]]
+; X64-AVX512BW-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0
+; X64-AVX512BW-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length255_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length255_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 191
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 191), align 1
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]]
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]]
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]]
+; X64-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0
+; X64-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length255_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length255_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 191
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 191), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]]
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]]
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]]
+; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length256(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length256(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length256(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length256(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length256(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length256(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length256(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length256(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length256(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length256(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length256(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind
+ ret i32 %m
+}
+
+define i1 @length256_eq(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length256_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length256_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length256_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length256_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length256_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length256_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 192
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 192
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]]
+; X64-AVX512BW-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]]
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length256_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length256_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 192
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 192
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]]
+; X64-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]]
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP22]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length256_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length256_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 192
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 192
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]]
+; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]]
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP22]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length256_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length256_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length256_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length256_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length256_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length256_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length256_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length256_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length256_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length256_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length256_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length256_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length256_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length256_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length256_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length256_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length256_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length256_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length256_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length256_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length256_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length256_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length256_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length256_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length256_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length256_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 192
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 192), align 1
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]]
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]]
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]]
+; X64-AVX512BW-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0
+; X64-AVX512BW-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length256_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length256_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 192
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 192), align 1
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]]
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]]
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]]
+; X64-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0
+; X64-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length256_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length256_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 192
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 192), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]]
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]]
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]]
+; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length384(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length384(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length384(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length384(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length384(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length384(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length384(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length384(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length384(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length384(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length384(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind
+ ret i32 %m
+}
+
+define i1 @length384_eq(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length384_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length384_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length384_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length384_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length384_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length384_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length384_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length384_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length384_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length384_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length384_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length384_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length384_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length384_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length384_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length384_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length384_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length384_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length384_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length384_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length384_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length384_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length384_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length384_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length384_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length384_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length384_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length384_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length384_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length384_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length384_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length384_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length384_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length384_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length384_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length384_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length384_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length384_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length384_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length384_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length511(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length511(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length511(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length511(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length511(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length511(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length511(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length511(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length511(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length511(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length511(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind
+ ret i32 %m
+}
+
+define i1 @length511_eq(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length511_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length511_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length511_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length511_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length511_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length511_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length511_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length511_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length511_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length511_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length511_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length511_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length511_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length511_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length511_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length511_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length511_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length511_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length511_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length511_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length511_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length511_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length511_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length511_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length511_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length511_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length511_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length511_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length511_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length511_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length511_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length511_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length511_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length511_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length511_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length511_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length511_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length511_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length511_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length511_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length512(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length512(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length512(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length512(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length512(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length512(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length512(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length512(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length512(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length512(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length512(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind
+ ret i32 %m
+}
+
+define i1 @length512_eq(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length512_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length512_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length512_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length512_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length512_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length512_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length512_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length512_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length512_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length512_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length512_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length512_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length512_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length512_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length512_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length512_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length512_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length512_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length512_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length512_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length512_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length512_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length512_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length512_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length512_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length512_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length512_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length512_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length512_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length512_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length512_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length512_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length512_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length512_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length512_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length512_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length512_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length512_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length512_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length512_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; This checks that we do not do stupid things with huge sizes.
+define i32 @huge_length(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @huge_length(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @huge_length(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @huge_length(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @huge_length(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @huge_length(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @huge_length(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @huge_length(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @huge_length(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @huge_length(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @huge_length(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind
+ ret i32 %m
+}
+
+define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @huge_length_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @huge_length_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @huge_length_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @huge_length_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @huge_length_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @huge_length_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @huge_length_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @huge_length_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @huge_length_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @huge_length_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; This checks non-constant sizes.
+define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind {
+; X64-LABEL: define i32 @nonconst_length(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @nonconst_length(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @nonconst_length(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @nonconst_length(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @nonconst_length(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @nonconst_length(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @nonconst_length(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @nonconst_length(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @nonconst_length(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @nonconst_length(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind
+ ret i32 %m
+}
+
+define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind {
+; X64-LABEL: define i1 @nonconst_length_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @nonconst_length_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @nonconst_length_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @nonconst_length_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @nonconst_length_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @nonconst_length_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @nonconst_length_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @nonconst_length_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @nonconst_length_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @nonconst_length_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
diff --git a/llvm/test/CodeGen/X86/memcmp-constant.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-constant.ll
similarity index 50%
rename from llvm/test/CodeGen/X86/memcmp-constant.ll
rename to llvm/test/Transforms/ExpandMemCmp/X86/memcmp-constant.ll
index 2059b8f8040827..908c6b34183e57 100644
--- a/llvm/test/CodeGen/X86/memcmp-constant.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-constant.ll
@@ -1,5 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD
+; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_2LD
+
@.str1 = private constant [4 x i8] c"\00\00\00\00", align 1
@.str2 = private constant [4 x i8] c"\ff\ff\ff\ff", align 1
@@ -7,49 +9,49 @@
declare i32 @memcmp(ptr, ptr, i64)
define i32 @length4_same() nounwind {
-; CHECK-LABEL: length4_same:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: retq
+; X64-LABEL: define i32 @length4_same(
+; X64-SAME: ) #[[ATTR0:[0-9]+]] {
+; X64-NEXT: ret i32 0
+;
%m = tail call i32 @memcmp(ptr @.str1, ptr @.str1, i64 4) nounwind
ret i32 %m
}
define i1 @length4_same_lt() nounwind {
-; CHECK-LABEL: length4_same_lt:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: retq
+; X64-LABEL: define i1 @length4_same_lt(
+; X64-SAME: ) #[[ATTR0]] {
+; X64-NEXT: ret i1 false
+;
%m = tail call i32 @memcmp(ptr @.str1, ptr @.str1, i64 4) nounwind
%c = icmp slt i32 %m, 0
ret i1 %c
}
define i1 @length4_same_gt() nounwind {
-; CHECK-LABEL: length4_same_gt:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: retq
+; X64-LABEL: define i1 @length4_same_gt(
+; X64-SAME: ) #[[ATTR0]] {
+; X64-NEXT: ret i1 false
+;
%m = tail call i32 @memcmp(ptr @.str1, ptr @.str1, i64 4) nounwind
%c = icmp sgt i32 %m, 0
ret i1 %c
}
define i1 @length4_same_le() nounwind {
-; CHECK-LABEL: length4_same_le:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: retq
+; X64-LABEL: define i1 @length4_same_le(
+; X64-SAME: ) #[[ATTR0]] {
+; X64-NEXT: ret i1 true
+;
%m = tail call i32 @memcmp(ptr @.str1, ptr @.str1, i64 4) nounwind
%c = icmp sle i32 %m, 0
ret i1 %c
}
define i1 @length4_same_ge() nounwind {
-; CHECK-LABEL: length4_same_ge:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: retq
+; X64-LABEL: define i1 @length4_same_ge(
+; X64-SAME: ) #[[ATTR0]] {
+; X64-NEXT: ret i1 true
+;
%m = tail call i32 @memcmp(ptr @.str1, ptr @.str1, i64 4) nounwind
%c = icmp sge i32 %m, 0
ret i1 %c
@@ -57,52 +59,55 @@ define i1 @length4_same_ge() nounwind {
define i32 @length4() nounwind {
-; CHECK-LABEL: length4:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl $-1, %eax
-; CHECK-NEXT: retq
+; X64-LABEL: define i32 @length4(
+; X64-SAME: ) #[[ATTR0]] {
+; X64-NEXT: ret i32 -1
+;
%m = tail call i32 @memcmp(ptr @.str1, ptr @.str2, i64 4) nounwind
ret i32 %m
}
define i1 @length4_lt() nounwind {
-; CHECK-LABEL: length4_lt:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: retq
+; X64-LABEL: define i1 @length4_lt(
+; X64-SAME: ) #[[ATTR0]] {
+; X64-NEXT: ret i1 true
+;
%m = tail call i32 @memcmp(ptr @.str1, ptr @.str2, i64 4) nounwind
%c = icmp slt i32 %m, 0
ret i1 %c
}
define i1 @length4_gt() nounwind {
-; CHECK-LABEL: length4_gt:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: retq
+; X64-LABEL: define i1 @length4_gt(
+; X64-SAME: ) #[[ATTR0]] {
+; X64-NEXT: ret i1 false
+;
%m = tail call i32 @memcmp(ptr @.str1, ptr @.str2, i64 4) nounwind
%c = icmp sgt i32 %m, 0
ret i1 %c
}
define i1 @length4_le() nounwind {
-; CHECK-LABEL: length4_le:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: retq
+; X64-LABEL: define i1 @length4_le(
+; X64-SAME: ) #[[ATTR0]] {
+; X64-NEXT: ret i1 true
+;
%m = tail call i32 @memcmp(ptr @.str1, ptr @.str2, i64 4) nounwind
%c = icmp sle i32 %m, 0
ret i1 %c
}
define i1 @length4_ge() nounwind {
-; CHECK-LABEL: length4_ge:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: retq
+; X64-LABEL: define i1 @length4_ge(
+; X64-SAME: ) #[[ATTR0]] {
+; X64-NEXT: ret i1 false
+;
%m = tail call i32 @memcmp(ptr @.str1, ptr @.str2, i64 4) nounwind
%c = icmp sge i32 %m, 0
ret i1 %c
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; X64_1LD: {{.*}}
+; X64_2LD: {{.*}}
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize-x32.ll
new file mode 100644
index 00000000000000..edd70ddb445dcc
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize-x32.ll
@@ -0,0 +1,493 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=cmov < %s | FileCheck %s --check-prefix=X86
+; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=X86-SSE2
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+ at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
+
+declare dso_local i32 @memcmp(ptr, ptr, i32)
+
+define i32 @length2(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i32 @length2(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR2:[0-9]+]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length2(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR2:[0-9]+]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_eq(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i1 @length2_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_const(ptr %X) nounwind minsize {
+; X86-LABEL: define i1 @length2_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR3:[0-9]+]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR3:[0-9]+]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length3(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i32 @length3(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 3) #[[ATTR2]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length3(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 3) #[[ATTR2]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i1 @length3_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 3) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length3_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 3) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i32 @length4(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 4) #[[ATTR2]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length4(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 4) #[[ATTR2]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i1 @length4_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 4) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length4_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 4) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_eq_const(ptr %X) nounwind minsize {
+; X86-LABEL: define i1 @length4_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length4_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length5(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i32 @length5(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 5) #[[ATTR2]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length5(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 5) #[[ATTR2]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i1 @length5_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 5) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length5_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 5) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i32 @length8(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 8) #[[ATTR2]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length8(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 8) #[[ATTR2]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i1 @length8_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 8) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length8_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 8) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length8_eq_const(ptr %X) nounwind minsize {
+; X86-LABEL: define i1 @length8_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 8) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length8_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 8) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length12_eq(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i1 @length12_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length12_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i32 @length12(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR2]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length12(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR2]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
+ ret i32 %m
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i32 @length16(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR2]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length16(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR2]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(ptr %x, ptr %y) nounwind minsize {
+;
+; X86-LABEL: define i1 @length16_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR2]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length16_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR2]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_eq_const(ptr %X) nounwind minsize {
+;
+; X86-LABEL: define i1 @length16_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 16) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length16_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 16) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
+
+define i32 @length24(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i32 @length24(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR2]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length24(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR2]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind
+ ret i32 %m
+}
+
+define i1 @length24_eq(ptr %x, ptr %y) nounwind minsize {
+; X86-LABEL: define i1 @length24_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR2]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length24_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR2]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_eq_const(ptr %X) nounwind minsize {
+; X86-LABEL: define i1 @length24_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length24_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length32(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i32 @length32(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR2]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length32(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR2]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(ptr %x, ptr %y) nounwind minsize {
+; X86-LABEL: define i1 @length32_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR2]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length32_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR2]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_const(ptr %X) nounwind minsize {
+; X86-LABEL: define i1 @length32_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length32_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length64(ptr %X, ptr %Y) nounwind minsize {
+; X86-LABEL: define i32 @length64(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR2]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length64(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR2]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(ptr %x, ptr %y) nounwind minsize {
+; X86-LABEL: define i1 @length64_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR2]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length64_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR2]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_eq_const(ptr %X) nounwind minsize {
+; X86-LABEL: define i1 @length64_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR2]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length64_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR2]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize.ll
new file mode 100644
index 00000000000000..431dc158962996
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize.ll
@@ -0,0 +1,707 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64
+; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=X64-AVX1
+; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s --check-prefix=X64-AVX2
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+ at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
+
+declare dso_local i32 @memcmp(ptr, ptr, i64)
+
+define i32 @length2(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i32 @length2(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR1:[0-9]+]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length2(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2:[0-9]+]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length2(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2:[0-9]+]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_eq(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i1 @length2_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_const(ptr %X) nounwind minsize {
+; X64-LABEL: define i1 @length2_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2:[0-9]+]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length3(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i32 @length3(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 3) #[[ATTR1]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length3(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 3) #[[ATTR2]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length3(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 3) #[[ATTR2]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i1 @length3_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 3) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length3_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 3) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length3_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 3) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i32 @length4(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) #[[ATTR1]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length4(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) #[[ATTR2]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length4(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) #[[ATTR2]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i1 @length4_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length4_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length4_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_eq_const(ptr %X) nounwind minsize {
+; X64-LABEL: define i1 @length4_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length4_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length4_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length5(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i32 @length5(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 5) #[[ATTR1]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length5(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 5) #[[ATTR2]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length5(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 5) #[[ATTR2]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i1 @length5_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 5) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length5_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 5) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length5_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 5) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i32 @length8(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 8) #[[ATTR1]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length8(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 8) #[[ATTR2]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length8(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 8) #[[ATTR2]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i1 @length8_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 8) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length8_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 8) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length8_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 8) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length8_eq_const(ptr %X) nounwind minsize {
+; X64-LABEL: define i1 @length8_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 8) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length8_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 8) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length8_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 8) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length12_eq(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i1 @length12_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 12) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length12_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 12) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length12_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 12) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i32 @length12(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 12) #[[ATTR1]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length12(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 12) #[[ATTR2]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length12(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 12) #[[ATTR2]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
+ ret i32 %m
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(ptr %X, ptr %Y) nounwind minsize {
+;
+; X64-LABEL: define i32 @length16(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 16) #[[ATTR1]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length16(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 16) #[[ATTR2]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length16(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 16) #[[ATTR2]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(ptr %x, ptr %y) nounwind minsize {
+; X64-SSE2-LABEL: length16_eq:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: movdqu (%rsi), %xmm0
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm1
+; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
+; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: setne %al
+; X64-SSE2-NEXT: retq
+;
+; X64-AVX-LABEL: length16_eq:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: setne %al
+; X64-AVX-NEXT: retq
+; X64-LABEL: define i1 @length16_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 16) #[[ATTR1]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length16_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 16) #[[ATTR2]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length16_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 16) #[[ATTR2]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_eq_const(ptr %X) nounwind minsize {
+; X64-SSE2-LABEL: length16_eq_const:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: sete %al
+; X64-SSE2-NEXT: retq
+;
+; X64-AVX-LABEL: length16_eq_const:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-LABEL: define i1 @length16_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 16) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length16_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 16) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length16_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 16) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
+
+define i32 @length24(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i32 @length24(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR1]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length24(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR2]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length24(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR2]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
+ ret i32 %m
+}
+
+define i1 @length24_eq(ptr %x, ptr %y) nounwind minsize {
+; X64-LABEL: define i1 @length24_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR1]]
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length24_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR2]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length24_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR2]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_eq_const(ptr %X) nounwind minsize {
+; X64-LABEL: define i1 @length24_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 24) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length24_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 24) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length24_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 24) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length32(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i32 @length32(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR1]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length32(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR2]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length32(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR2]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(ptr %x, ptr %y) nounwind minsize {
+; X64-SSE2-LABEL: length32_eq:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: pushq %rax
+; X64-SSE2-NEXT: pushq $32
+; X64-SSE2-NEXT: popq %rdx
+; X64-SSE2-NEXT: callq memcmp
+; X64-SSE2-NEXT: testl %eax, %eax
+; X64-SSE2-NEXT: sete %al
+; X64-SSE2-NEXT: popq %rcx
+; X64-SSE2-NEXT: retq
+;
+; X64-LABEL: define i1 @length32_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR1]]
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length32_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR2]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length32_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR2]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_const(ptr %X) nounwind minsize {
+; X64-SSE2-LABEL: length32_eq_const:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: pushq %rax
+; X64-SSE2-NEXT: pushq $32
+; X64-SSE2-NEXT: popq %rdx
+; X64-SSE2-NEXT: movl $.L.str, %esi
+; X64-SSE2-NEXT: callq memcmp
+; X64-SSE2-NEXT: testl %eax, %eax
+; X64-SSE2-NEXT: setne %al
+; X64-SSE2-NEXT: popq %rcx
+; X64-SSE2-NEXT: retq
+;
+; X64-LABEL: define i1 @length32_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 32) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length32_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 32) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length32_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 32) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length64(ptr %X, ptr %Y) nounwind minsize {
+; X64-LABEL: define i32 @length64(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR1]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length64(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR2]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length64(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR2]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(ptr %x, ptr %y) nounwind minsize {
+; X64-LABEL: define i1 @length64_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR1]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length64_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR2]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length64_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR2]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_eq_const(ptr %X) nounwind minsize {
+; X64-LABEL: define i1 @length64_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 64) #[[ATTR1]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length64_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 64) #[[ATTR2]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length64_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 64) #[[ATTR2]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs-x32.ll
new file mode 100644
index 00000000000000..abdadb14086c20
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs-x32.ll
@@ -0,0 +1,6203 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; NOTE: This is a copy of llvm/test/CodeGen/X86/memcmp.ll with more load pairs. Please keep it that way.
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=i686-unknown-unknown -mattr=cmov < %s | FileCheck %s --check-prefixes=X86
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=i686-unknown-unknown -mattr=+sse < %s | FileCheck %s --check-prefixes=X86-SSE1
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefixes=X86-SSE2
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=i686-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=X86-SSE41
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+ at .str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1
+
+declare dso_local i32 @memcmp(ptr, ptr, i32)
+
+define i32 @length0(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length0(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X86-NEXT: ret i32 0
+;
+; X86-SSE1-LABEL: define i32 @length0(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X86-SSE1-NEXT: ret i32 0
+;
+; X86-SSE2-LABEL: define i32 @length0(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X86-SSE2-NEXT: ret i32 0
+;
+; X86-SSE41-LABEL: define i32 @length0(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X86-SSE41-NEXT: ret i32 0
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind
+ ret i32 %m
+ }
+
+define i1 @length0_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length0_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: ret i1 true
+;
+; X86-SSE1-LABEL: define i1 @length0_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: ret i1 true
+;
+; X86-SSE2-LABEL: define i1 @length0_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: ret i1 true
+;
+; X86-SSE41-LABEL: define i1 @length0_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: ret i1 true
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length0_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: ret i1 false
+;
+; X86-SSE1-LABEL: define i1 @length0_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: ret i1 false
+;
+; X86-SSE2-LABEL: define i1 @length0_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: ret i1 false
+;
+; X86-SSE41-LABEL: define i1 @length0_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: ret i1 false
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length2(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length2(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: ret i32 [[TMP7]]
+;
+; X86-SSE1-LABEL: define i32 @length2(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: ret i32 [[TMP7]]
+;
+; X86-SSE2-LABEL: define i32 @length2(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: ret i32 [[TMP7]]
+;
+; X86-SSE41-LABEL: define i32 @length2(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: ret i32 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length2_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length2_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length2_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length2_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length2_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length2_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length2_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length2_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length2_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length2_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-NEXT: ret i1 [[TMP2]]
+;
+; X86-SSE1-LABEL: define i1 @length2_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X86-SSE1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP2]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP2]]
+;
+; X86-SSE41-LABEL: define i1 @length2_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length3(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length3(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br label [[ENDBLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE1-LABEL: define i32 @length3(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br label [[ENDBLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length3(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br label [[ENDBLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE41-LABEL: define i32 @length3(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br label [[ENDBLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length3_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X86-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X86-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X86-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE1-LABEL: define i1 @length3_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X86-SSE1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X86-SSE1-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X86-SSE1-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X86-SSE1-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE2-LABEL: define i1 @length3_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X86-SSE2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE41-LABEL: define i1 @length3_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X86-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X86-SSE41-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X86-SSE41-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X86-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length4(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X86-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X86-NEXT: ret i32 [[TMP9]]
+;
+; X86-SSE1-LABEL: define i32 @length4(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X86-SSE1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X86-SSE1-NEXT: ret i32 [[TMP9]]
+;
+; X86-SSE2-LABEL: define i32 @length4(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X86-SSE2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X86-SSE2-NEXT: ret i32 [[TMP9]]
+;
+; X86-SSE41-LABEL: define i32 @length4(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE41-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE41-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X86-SSE41-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X86-SSE41-NEXT: ret i32 [[TMP9]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length4_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-NEXT: ret i1 [[TMP3]]
+;
+; X86-SSE1-LABEL: define i1 @length4_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP3]]
+;
+; X86-SSE2-LABEL: define i1 @length4_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP3]]
+;
+; X86-SSE41-LABEL: define i1 @length4_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP3]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length4_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-NEXT: ret i1 [[TMP5]]
+;
+; X86-SSE1-LABEL: define i1 @length4_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE1-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE1-NEXT: ret i1 [[TMP5]]
+;
+; X86-SSE2-LABEL: define i1 @length4_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE2-NEXT: ret i1 [[TMP5]]
+;
+; X86-SSE41-LABEL: define i1 @length4_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE41-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE41-NEXT: ret i1 [[TMP5]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length4_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-NEXT: ret i1 [[TMP5]]
+;
+; X86-SSE1-LABEL: define i1 @length4_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE1-NEXT: ret i1 [[TMP5]]
+;
+; X86-SSE2-LABEL: define i1 @length4_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE2-NEXT: ret i1 [[TMP5]]
+;
+; X86-SSE41-LABEL: define i1 @length4_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE41-NEXT: ret i1 [[TMP5]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length4_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length4_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X86-SSE1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length4_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length4_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length5(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length5(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br label [[ENDBLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE1-LABEL: define i32 @length5(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br label [[ENDBLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length5(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br label [[ENDBLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE41-LABEL: define i32 @length5(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br label [[ENDBLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length5_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X86-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X86-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X86-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE1-LABEL: define i1 @length5_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X86-SSE1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X86-SSE1-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X86-SSE1-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X86-SSE1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE2-LABEL: define i1 @length5_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X86-SSE2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE41-LABEL: define i1 @length5_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X86-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X86-SSE41-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X86-SSE41-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X86-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length5_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br label [[ENDBLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length5_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br label [[ENDBLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length5_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br label [[ENDBLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length5_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br label [[ENDBLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length7(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length7(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE1-LABEL: define i32 @length7(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length7(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE41-LABEL: define i32 @length7(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind
+ ret i32 %m
+}
+
+define i1 @length7_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length7_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-NEXT: ret i1 [[TMP10]]
+;
+; X86-SSE1-LABEL: define i1 @length7_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE1-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP10]]
+;
+; X86-SSE2-LABEL: define i1 @length7_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP10]]
+;
+; X86-SSE41-LABEL: define i1 @length7_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP10]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length7_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length7_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length7_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length7_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length8(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE1-LABEL: define i32 @length8(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length8(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE41-LABEL: define i32 @length8(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length8_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length8_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE1-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length8_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length8_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length8_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length8_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE1-LABEL: define i1 @length8_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-SSE1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-SSE1-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-SSE1-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE2-LABEL: define i1 @length8_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE41-LABEL: define i1 @length8_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-SSE41-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length9_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
+; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
+; X86-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
+; X86-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]]
+; X86-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+; X86-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length9_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
+; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE1-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
+; X86-SSE1-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
+; X86-SSE1-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE1-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]]
+; X86-SSE1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+; X86-SSE1-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length9_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE2-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
+; X86-SSE2-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
+; X86-SSE2-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]]
+; X86-SSE2-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+; X86-SSE2-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length9_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
+; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE41-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
+; X86-SSE41-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
+; X86-SSE41-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]]
+; X86-SSE41-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+; X86-SSE41-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length10_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length10_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP10]], align 1
+; X86-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
+; X86-NEXT: [[TMP14:%.*]] = zext i16 [[TMP12]] to i32
+; X86-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
+; X86-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]]
+; X86-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+; X86-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length10_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP10]], align 1
+; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
+; X86-SSE1-NEXT: [[TMP14:%.*]] = zext i16 [[TMP12]] to i32
+; X86-SSE1-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
+; X86-SSE1-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE1-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]]
+; X86-SSE1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+; X86-SSE1-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length10_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP10]], align 1
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
+; X86-SSE2-NEXT: [[TMP14:%.*]] = zext i16 [[TMP12]] to i32
+; X86-SSE2-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
+; X86-SSE2-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]]
+; X86-SSE2-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+; X86-SSE2-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length10_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP10]], align 1
+; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
+; X86-SSE41-NEXT: [[TMP14:%.*]] = zext i16 [[TMP12]] to i32
+; X86-SSE41-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
+; X86-SSE41-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]]
+; X86-SSE41-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+; X86-SSE41-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 10) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length11_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length11_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]]
+; X86-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; X86-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP17]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length11_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE1-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE1-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE1-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]]
+; X86-SSE1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; X86-SSE1-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP17]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length11_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE2-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]]
+; X86-SSE2-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; X86-SSE2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP17]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length11_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE41-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]]
+; X86-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; X86-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP17]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 11) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length12_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length12_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]]
+; X86-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; X86-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X86-NEXT: ret i1 [[TMP16]]
+;
+; X86-SSE1-LABEL: define i1 @length12_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE1-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE1-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE1-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]]
+; X86-SSE1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; X86-SSE1-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP16]]
+;
+; X86-SSE2-LABEL: define i1 @length12_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE2-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]]
+; X86-SSE2-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; X86-SSE2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP16]]
+;
+; X86-SSE41-LABEL: define i1 @length12_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE41-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]]
+; X86-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; X86-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP16]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length12(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86: loadbb2:
+; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE1-LABEL: define i32 @length12(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86-SSE1: loadbb2:
+; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-SSE1-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-SSE1-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-SSE1-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-SSE1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length12(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86-SSE2: loadbb2:
+; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-SSE2-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-SSE2-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-SSE2-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-SSE2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE41-LABEL: define i32 @length12(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86-SSE41: loadbb2:
+; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE41-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE41-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-SSE41-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-SSE41-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
+ ret i32 %m
+}
+
+define i1 @length13_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length13_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP14]], align 1
+; X86-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP15]], align 1
+; X86-NEXT: [[TMP18:%.*]] = zext i8 [[TMP16]] to i32
+; X86-NEXT: [[TMP19:%.*]] = zext i8 [[TMP17]] to i32
+; X86-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
+; X86-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]]
+; X86-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
+; X86-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+; X86-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length13_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE1-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE1-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP14]], align 1
+; X86-SSE1-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP15]], align 1
+; X86-SSE1-NEXT: [[TMP18:%.*]] = zext i8 [[TMP16]] to i32
+; X86-SSE1-NEXT: [[TMP19:%.*]] = zext i8 [[TMP17]] to i32
+; X86-SSE1-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
+; X86-SSE1-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE1-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]]
+; X86-SSE1-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
+; X86-SSE1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+; X86-SSE1-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length13_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE2-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP14]], align 1
+; X86-SSE2-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP15]], align 1
+; X86-SSE2-NEXT: [[TMP18:%.*]] = zext i8 [[TMP16]] to i32
+; X86-SSE2-NEXT: [[TMP19:%.*]] = zext i8 [[TMP17]] to i32
+; X86-SSE2-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
+; X86-SSE2-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]]
+; X86-SSE2-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
+; X86-SSE2-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+; X86-SSE2-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length13_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE41-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP14]], align 1
+; X86-SSE41-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP15]], align 1
+; X86-SSE41-NEXT: [[TMP18:%.*]] = zext i8 [[TMP16]] to i32
+; X86-SSE41-NEXT: [[TMP19:%.*]] = zext i8 [[TMP17]] to i32
+; X86-SSE41-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
+; X86-SSE41-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]]
+; X86-SSE41-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
+; X86-SSE41-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+; X86-SSE41-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 13) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length14_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length14_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP14]], align 1
+; X86-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP15]], align 1
+; X86-NEXT: [[TMP18:%.*]] = zext i16 [[TMP16]] to i32
+; X86-NEXT: [[TMP19:%.*]] = zext i16 [[TMP17]] to i32
+; X86-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
+; X86-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]]
+; X86-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
+; X86-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+; X86-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length14_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE1-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE1-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP14]], align 1
+; X86-SSE1-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP15]], align 1
+; X86-SSE1-NEXT: [[TMP18:%.*]] = zext i16 [[TMP16]] to i32
+; X86-SSE1-NEXT: [[TMP19:%.*]] = zext i16 [[TMP17]] to i32
+; X86-SSE1-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
+; X86-SSE1-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE1-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]]
+; X86-SSE1-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
+; X86-SSE1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+; X86-SSE1-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length14_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE2-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP14]], align 1
+; X86-SSE2-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP15]], align 1
+; X86-SSE2-NEXT: [[TMP18:%.*]] = zext i16 [[TMP16]] to i32
+; X86-SSE2-NEXT: [[TMP19:%.*]] = zext i16 [[TMP17]] to i32
+; X86-SSE2-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
+; X86-SSE2-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]]
+; X86-SSE2-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
+; X86-SSE2-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+; X86-SSE2-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length14_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE41-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP14]], align 1
+; X86-SSE41-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP15]], align 1
+; X86-SSE41-NEXT: [[TMP18:%.*]] = zext i16 [[TMP16]] to i32
+; X86-SSE41-NEXT: [[TMP19:%.*]] = zext i16 [[TMP17]] to i32
+; X86-SSE41-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
+; X86-SSE41-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]]
+; X86-SSE41-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
+; X86-SSE41-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+; X86-SSE41-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 14) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length15_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length15_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 11
+; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 11
+; X86-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1
+; X86-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]]
+; X86-NEXT: [[TMP19:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP20:%.*]] = or i32 [[TMP13]], [[TMP18]]
+; X86-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]]
+; X86-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+; X86-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP23]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length15_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE1-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 11
+; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 11
+; X86-SSE1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1
+; X86-SSE1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE1-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]]
+; X86-SSE1-NEXT: [[TMP19:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE1-NEXT: [[TMP20:%.*]] = or i32 [[TMP13]], [[TMP18]]
+; X86-SSE1-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]]
+; X86-SSE1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+; X86-SSE1-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP23]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length15_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 11
+; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 11
+; X86-SSE2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1
+; X86-SSE2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE2-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]]
+; X86-SSE2-NEXT: [[TMP19:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP20:%.*]] = or i32 [[TMP13]], [[TMP18]]
+; X86-SSE2-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]]
+; X86-SSE2-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+; X86-SSE2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP23]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length15_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 11
+; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 11
+; X86-SSE41-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1
+; X86-SSE41-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE41-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]]
+; X86-SSE41-NEXT: [[TMP19:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP20:%.*]] = or i32 [[TMP13]], [[TMP18]]
+; X86-SSE41-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]]
+; X86-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+; X86-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP23]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length16(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86: loadbb2:
+; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X86: loadbb3:
+; X86-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1
+; X86-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1
+; X86-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]])
+; X86-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]])
+; X86-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X86-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE1-LABEL: define i32 @length16(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86-SSE1: loadbb2:
+; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-SSE1-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-SSE1-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-SSE1-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-SSE1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X86-SSE1: loadbb3:
+; X86-SSE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1
+; X86-SSE1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1
+; X86-SSE1-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]])
+; X86-SSE1-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]])
+; X86-SSE1-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X86-SSE1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length16(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86-SSE2: loadbb2:
+; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-SSE2-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-SSE2-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-SSE2-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-SSE2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X86-SSE2: loadbb3:
+; X86-SSE2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1
+; X86-SSE2-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1
+; X86-SSE2-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]])
+; X86-SSE2-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]])
+; X86-SSE2-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X86-SSE2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE41-LABEL: define i32 @length16(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86-SSE41: loadbb2:
+; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE41-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE41-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-SSE41-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-SSE41-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X86-SSE41: loadbb3:
+; X86-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE41-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1
+; X86-SSE41-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1
+; X86-SSE41-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]])
+; X86-SSE41-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]])
+; X86-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X86-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(ptr %x, ptr %y) nounwind {
+; X86-NOSSE-LABEL: length16_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %esi
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl (%edx), %esi
+; X86-NOSSE-NEXT: movl 4(%edx), %eax
+; X86-NOSSE-NEXT: xorl (%ecx), %esi
+; X86-NOSSE-NEXT: xorl 4(%ecx), %eax
+; X86-NOSSE-NEXT: orl %esi, %eax
+; X86-NOSSE-NEXT: movl 8(%edx), %esi
+; X86-NOSSE-NEXT: xorl 8(%ecx), %esi
+; X86-NOSSE-NEXT: movl 12(%edx), %edx
+; X86-NOSSE-NEXT: xorl 12(%ecx), %edx
+; X86-NOSSE-NEXT: orl %esi, %edx
+; X86-NOSSE-NEXT: orl %eax, %edx
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: popl %esi
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length16_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1
+; X86-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]]
+; X86-NEXT: [[TMP19:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP20:%.*]] = or i32 [[TMP13]], [[TMP18]]
+; X86-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]]
+; X86-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+; X86-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X86-NEXT: ret i1 [[TMP22]]
+;
+; X86-SSE1-LABEL: define i1 @length16_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1
+; X86-SSE1-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]]
+; X86-SSE1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1
+; X86-SSE1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE1-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]]
+; X86-SSE1-NEXT: [[TMP19:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE1-NEXT: [[TMP20:%.*]] = or i32 [[TMP13]], [[TMP18]]
+; X86-SSE1-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]]
+; X86-SSE1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+; X86-SSE1-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP22]]
+;
+; X86-SSE2-LABEL: define i1 @length16_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP3]]
+;
+; X86-SSE41-LABEL: define i1 @length16_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP3]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length16_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86: loadbb2:
+; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X86: loadbb3:
+; X86-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1
+; X86-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1
+; X86-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]])
+; X86-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]])
+; X86-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X86-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length16_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86-SSE1: loadbb2:
+; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-SSE1-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-SSE1-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-SSE1-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-SSE1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X86-SSE1: loadbb3:
+; X86-SSE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1
+; X86-SSE1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1
+; X86-SSE1-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]])
+; X86-SSE1-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]])
+; X86-SSE1-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X86-SSE1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length16_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86-SSE2: loadbb2:
+; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-SSE2-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-SSE2-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-SSE2-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-SSE2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X86-SSE2: loadbb3:
+; X86-SSE2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1
+; X86-SSE2-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1
+; X86-SSE2-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]])
+; X86-SSE2-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]])
+; X86-SSE2-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X86-SSE2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length16_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86-SSE41: loadbb2:
+; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE41-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE41-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-SSE41-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-SSE41-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X86-SSE41: loadbb3:
+; X86-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE41-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1
+; X86-SSE41-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1
+; X86-SSE41-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]])
+; X86-SSE41-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]])
+; X86-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X86-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length16_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86: loadbb2:
+; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X86: loadbb3:
+; X86-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1
+; X86-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1
+; X86-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]])
+; X86-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]])
+; X86-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X86-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length16_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86-SSE1: loadbb2:
+; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-SSE1-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-SSE1-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-SSE1-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-SSE1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X86-SSE1: loadbb3:
+; X86-SSE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1
+; X86-SSE1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1
+; X86-SSE1-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]])
+; X86-SSE1-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]])
+; X86-SSE1-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X86-SSE1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length16_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86-SSE2: loadbb2:
+; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-SSE2-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-SSE2-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-SSE2-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-SSE2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X86-SSE2: loadbb3:
+; X86-SSE2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1
+; X86-SSE2-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1
+; X86-SSE2-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]])
+; X86-SSE2-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]])
+; X86-SSE2-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X86-SSE2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length16_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X86-SSE41: loadbb2:
+; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE41-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1
+; X86-SSE41-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1
+; X86-SSE41-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
+; X86-SSE41-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]])
+; X86-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; X86-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X86-SSE41: loadbb3:
+; X86-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12
+; X86-SSE41-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1
+; X86-SSE41-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1
+; X86-SSE41-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]])
+; X86-SSE41-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]])
+; X86-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X86-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_eq_const(ptr %X) nounwind {
+; X86-NOSSE-LABEL: length16_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %esi
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl $858927408, %ecx # imm = 0x33323130
+; X86-NOSSE-NEXT: xorl (%eax), %ecx
+; X86-NOSSE-NEXT: movl $926299444, %edx # imm = 0x37363534
+; X86-NOSSE-NEXT: xorl 4(%eax), %edx
+; X86-NOSSE-NEXT: orl %ecx, %edx
+; X86-NOSSE-NEXT: movl $825243960, %ecx # imm = 0x31303938
+; X86-NOSSE-NEXT: xorl 8(%eax), %ecx
+; X86-NOSSE-NEXT: movl $892613426, %esi # imm = 0x35343332
+; X86-NOSSE-NEXT: xorl 12(%eax), %esi
+; X86-NOSSE-NEXT: orl %ecx, %esi
+; X86-NOSSE-NEXT: orl %edx, %esi
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: popl %esi
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length16_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], 825243960
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP11:%.*]] = xor i32 [[TMP10]], 892613426
+; X86-NEXT: [[TMP12:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-NEXT: [[TMP13:%.*]] = or i32 [[TMP8]], [[TMP11]]
+; X86-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+; X86-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length16_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-SSE1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-SSE1-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-SSE1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], 825243960
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 12
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = xor i32 [[TMP10]], 892613426
+; X86-SSE1-NEXT: [[TMP12:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-SSE1-NEXT: [[TMP13:%.*]] = or i32 [[TMP8]], [[TMP11]]
+; X86-SSE1-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+; X86-SSE1-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length16_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length16_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
+
+define i32 @length24(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length24(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5:[0-9]+]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length24(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5:[0-9]+]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length24(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5:[0-9]+]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length24(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5:[0-9]+]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind
+ ret i32 %m
+}
+
+define i1 @length24_eq(ptr %x, ptr %y) nounwind {
+; X86-NOSSE-LABEL: length24_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $24
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length24_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length24_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length24_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length24_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length24_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length24_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length24_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length24_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length24_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length24_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length24_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length24_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_eq_const(ptr %X) nounwind {
+; X86-NOSSE-LABEL: length24_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $24
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length24_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length24_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length24_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 68051240286688436651889234231545575736
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE41-LABEL: define i1 @length24_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 68051240286688436651889234231545575736
+; X86-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length31(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length31(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length31(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length31(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length31(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 31) nounwind
+ ret i32 %m
+}
+
+define i1 @length31_eq(ptr %x, ptr %y) nounwind {
+; X86-NOSSE-LABEL: length31_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $31
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length31_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length31_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length31_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length31_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length31_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length31_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length31_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length31_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length31_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length31_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length31_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length31_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+; X86-NOSSE-LABEL: length31_eq_prefer128:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $31
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length31_eq_prefer128(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length31_eq_prefer128(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length31_eq_prefer128(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length31_eq_prefer128(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_eq_const(ptr %X) nounwind {
+; X86-NOSSE-LABEL: length31_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $31
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length31_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 31) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length31_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 31) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length31_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE41-LABEL: define i1 @length31_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X86-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 31) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length32(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length32(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length32(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length32(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length32(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(ptr %x, ptr %y) nounwind {
+; X86-NOSSE-LABEL: length32_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length32_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length32_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length32_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length32_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length32_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length32_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length32_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length32_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length32_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length32_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length32_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length32_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+; X86-NOSSE-LABEL: length32_eq_prefer128:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length32_eq_prefer128(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length32_eq_prefer128(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length32_eq_prefer128(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length32_eq_prefer128(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_const(ptr %X) nounwind {
+; X86-NOSSE-LABEL: length32_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length32_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length32_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length32_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE41-LABEL: define i1 @length32_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X86-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length48(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length48(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length48(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length48(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length48(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 48) nounwind
+ ret i32 %m
+}
+
+define i1 @length48_eq(ptr %x, ptr %y) nounwind {
+; X86-NOSSE-LABEL: length48_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $48
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length48_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length48_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length48_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X86-SSE2-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X86-SSE2-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X86-SSE2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length48_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X86-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X86-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X86-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length48_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length48_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length48_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length48_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length48_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length48_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length48_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length48_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+; X86-NOSSE-LABEL: length48_eq_prefer128:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $48
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length48_eq_prefer128(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length48_eq_prefer128(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length48_eq_prefer128(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X86-SSE2-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X86-SSE2-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X86-SSE2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length48_eq_prefer128(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X86-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X86-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X86-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_eq_const(ptr %X) nounwind {
+; X86-NOSSE-LABEL: length48_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $48
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length48_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 48) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length48_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 48) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length48_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X86-SSE2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = or i128 [[TMP9]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0
+; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP11]]
+;
+; X86-SSE41-LABEL: define i1 @length48_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X86-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = or i128 [[TMP9]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0
+; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP11]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 48) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length63(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length63(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length63(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length63(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length63(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 63) nounwind
+ ret i32 %m
+}
+
+define i1 @length63_eq(ptr %x, ptr %y) nounwind {
+; X86-NOSSE-LABEL: length63_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $63
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length63_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length63_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length63_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X86-SSE2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 47
+; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 47
+; X86-SSE2-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1
+; X86-SSE2-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1
+; X86-SSE2-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]]
+; X86-SSE2-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]]
+; X86-SSE2-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]]
+; X86-SSE2-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0
+; X86-SSE2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP22]]
+;
+; X86-SSE41-LABEL: define i1 @length63_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X86-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 47
+; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 47
+; X86-SSE41-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1
+; X86-SSE41-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1
+; X86-SSE41-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]]
+; X86-SSE41-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]]
+; X86-SSE41-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]]
+; X86-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0
+; X86-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP22]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length63_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length63_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length63_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length63_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length63_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length63_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length63_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length63_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_eq_const(ptr %X) nounwind {
+; X86-NOSSE-LABEL: length63_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $63
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length63_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 63) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length63_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 63) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length63_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X86-SSE2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 47
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 66716800424378146251538984255488604215
+; X86-SSE2-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]]
+; X86-SSE2-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0
+; X86-SSE2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length63_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X86-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 47
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 66716800424378146251538984255488604215
+; X86-SSE41-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE41-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]]
+; X86-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0
+; X86-SSE41-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 63) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length64(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length64(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length64(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length64(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length64(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(ptr %x, ptr %y) nounwind {
+; X86-NOSSE-LABEL: length64_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $64
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length64_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length64_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length64_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X86-SSE2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; X86-SSE2-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1
+; X86-SSE2-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1
+; X86-SSE2-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]]
+; X86-SSE2-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]]
+; X86-SSE2-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]]
+; X86-SSE2-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0
+; X86-SSE2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP22]]
+;
+; X86-SSE41-LABEL: define i1 @length64_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X86-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; X86-SSE41-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1
+; X86-SSE41-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1
+; X86-SSE41-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]]
+; X86-SSE41-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]]
+; X86-SSE41-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]]
+; X86-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0
+; X86-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP22]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length64_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length64_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length64_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length64_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length64_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length64_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length64_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length64_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_eq_const(ptr %X) nounwind {
+; X86-NOSSE-LABEL: length64_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $64
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length64_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length64_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length64_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X86-SSE2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 68051240286688436651889234231545575736
+; X86-SSE2-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]]
+; X86-SSE2-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0
+; X86-SSE2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length64_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X86-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 68051240286688436651889234231545575736
+; X86-SSE41-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE41-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]]
+; X86-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0
+; X86-SSE41-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length96(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length96(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length96(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length96(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length96(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 96) nounwind
+ ret i32 %m
+}
+
+define i1 @length96_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length96_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length96_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length96_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length96_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length96_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length96_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length96_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length96_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length96_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length96_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length96_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length96_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length96_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length96_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length96_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length96_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 96) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length127(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length127(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length127(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length127(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length127(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 127) nounwind
+ ret i32 %m
+}
+
+define i1 @length127_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length127_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length127_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length127_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length127_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length127_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length127_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length127_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length127_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length127_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length127_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length127_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length127_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length127_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length127_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length127_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length127_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 127) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length128(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length128(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length128(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length128(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length128(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 128) nounwind
+ ret i32 %m
+}
+
+define i1 @length128_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length128_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length128_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length128_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length128_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length128_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length128_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length128_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length128_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length128_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length128_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length128_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length128_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length128_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length128_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length128_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length128_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 128) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length192(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length192(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length192(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length192(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length192(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 192) nounwind
+ ret i32 %m
+}
+
+define i1 @length192_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length192_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length192_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length192_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length192_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length192_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length192_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length192_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length192_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length192_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length192_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length192_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length192_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length192_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length192_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length192_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length192_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 192) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length255(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length255(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length255(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length255(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length255(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 255) nounwind
+ ret i32 %m
+}
+
+define i1 @length255_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length255_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length255_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length255_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length255_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length255_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length255_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length255_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length255_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length255_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length255_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length255_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length255_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length255_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length255_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length255_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length255_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 255) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length256(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length256(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length256(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length256(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length256(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 256) nounwind
+ ret i32 %m
+}
+
+define i1 @length256_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length256_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length256_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length256_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length256_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length256_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length256_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length256_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length256_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length256_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length256_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length256_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length256_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length256_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length256_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length256_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length256_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 256) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length384(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length384(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length384(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length384(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length384(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 384) nounwind
+ ret i32 %m
+}
+
+define i1 @length384_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length384_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length384_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length384_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length384_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length384_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length384_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length384_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length384_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length384_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length384_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length384_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length384_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length384_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length384_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length384_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length384_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 384) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length511(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length511(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length511(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length511(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length511(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 511) nounwind
+ ret i32 %m
+}
+
+define i1 @length511_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length511_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length511_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length511_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length511_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length511_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length511_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length511_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length511_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length511_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length511_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length511_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length511_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length511_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length511_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length511_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length511_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 511) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length512(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length512(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length512(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length512(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length512(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 512) nounwind
+ ret i32 %m
+}
+
+define i1 @length512_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length512_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length512_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length512_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length512_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length512_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length512_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length512_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length512_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length512_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length512_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length512_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length512_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length512_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length512_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length512_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length512_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 512) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; This checks that we do not do stupid things with huge sizes.
+define i32 @huge_length(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @huge_length(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @huge_length(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @huge_length(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @huge_length(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind
+ ret i32 %m
+}
+
+define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @huge_length_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @huge_length_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @huge_length_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @huge_length_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; This checks non-constant sizes.
+define i32 @nonconst_length(ptr %X, ptr %Y, i32 %size) nounwind {
+; X86-LABEL: define i32 @nonconst_length(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @nonconst_length(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @nonconst_length(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @nonconst_length(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind
+ ret i32 %m
+}
+
+define i1 @nonconst_length_eq(ptr %X, ptr %Y, i32 %size) nounwind {
+; X86-LABEL: define i1 @nonconst_length_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @nonconst_length_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @nonconst_length_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @nonconst_length_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs.ll
new file mode 100644
index 00000000000000..56489a08800b76
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs.ll
@@ -0,0 +1,18833 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; NOTE: This is a copy of llvm/test/CodeGen/X86/memcmp.ll with more load pairs. Please keep it that way.
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=X64
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=sse4.1 < %s | FileCheck %s --check-prefixes=X64-SSE41
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefixes=X64-AVX1
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s --check-prefixes=X64-AVX2
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit < %s | FileCheck %s --check-prefixes=X64-AVX512BW-256
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit < %s | FileCheck %s --check-prefixes=X64-AVX512BW
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-AVX512F-256
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-AVX512F
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-MIC-AVX2
+; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-MIC-AVX512F
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+ at .str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1
+
+declare dso_local i32 @memcmp(ptr, ptr, i64)
+
+define i32 @length0(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length0(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; X64-NEXT: ret i32 0
+;
+; X64-SSE41-LABEL: define i32 @length0(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-SSE41-NEXT: ret i32 0
+;
+; X64-AVX1-LABEL: define i32 @length0(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX1-NEXT: ret i32 0
+;
+; X64-AVX2-LABEL: define i32 @length0(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX2-NEXT: ret i32 0
+;
+; X64-AVX512BW-256-LABEL: define i32 @length0(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX512BW-256-NEXT: ret i32 0
+;
+; X64-AVX512BW-LABEL: define i32 @length0(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX512BW-NEXT: ret i32 0
+;
+; X64-AVX512F-256-LABEL: define i32 @length0(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX512F-256-NEXT: ret i32 0
+;
+; X64-AVX512F-LABEL: define i32 @length0(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX512F-NEXT: ret i32 0
+;
+; X64-MIC-AVX2-LABEL: define i32 @length0(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-MIC-AVX2-NEXT: ret i32 0
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length0(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-MIC-AVX512F-NEXT: ret i32 0
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
+ ret i32 %m
+ }
+
+define i1 @length0_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length0_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: ret i1 true
+;
+; X64-SSE41-LABEL: define i1 @length0_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: ret i1 true
+;
+; X64-AVX1-LABEL: define i1 @length0_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: ret i1 true
+;
+; X64-AVX2-LABEL: define i1 @length0_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: ret i1 true
+;
+; X64-AVX512BW-256-LABEL: define i1 @length0_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: ret i1 true
+;
+; X64-AVX512BW-LABEL: define i1 @length0_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: ret i1 true
+;
+; X64-AVX512F-256-LABEL: define i1 @length0_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: ret i1 true
+;
+; X64-AVX512F-LABEL: define i1 @length0_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: ret i1 true
+;
+; X64-MIC-AVX2-LABEL: define i1 @length0_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: ret i1 true
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length0_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: ret i1 true
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length0_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: ret i1 false
+;
+; X64-SSE41-LABEL: define i1 @length0_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: ret i1 false
+;
+; X64-AVX1-LABEL: define i1 @length0_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: ret i1 false
+;
+; X64-AVX2-LABEL: define i1 @length0_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: ret i1 false
+;
+; X64-AVX512BW-256-LABEL: define i1 @length0_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: ret i1 false
+;
+; X64-AVX512BW-LABEL: define i1 @length0_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: ret i1 false
+;
+; X64-AVX512F-256-LABEL: define i1 @length0_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: ret i1 false
+;
+; X64-AVX512F-LABEL: define i1 @length0_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: ret i1 false
+;
+; X64-MIC-AVX2-LABEL: define i1 @length0_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: ret i1 false
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length0_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: ret i1 false
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length2(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length2(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: ret i32 [[TMP7]]
+;
+; X64-SSE41-LABEL: define i32 @length2(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX1-LABEL: define i32 @length2(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX2-LABEL: define i32 @length2(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length2(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX512BW-LABEL: define i32 @length2(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length2(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX512F-LABEL: define i32 @length2(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: ret i32 [[TMP7]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length2(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: ret i32 [[TMP7]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length2(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[TMP7]]
+;
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length2_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length2_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length2_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length2_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length2_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length2_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length2_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length2_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length2_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length2_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length2_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length2_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length2_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length2_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length2_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length2_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length2_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length2_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length2_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length2_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length2_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length2_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length2_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length2_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length2_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: ret i1 [[TMP2]]
+;
+; X64-SSE41-LABEL: define i1 @length2_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length2_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512BW-LABEL: define i1 @length2_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length2_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512F-LABEL: define i1 @length2_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP2]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length2_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP2]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length2_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP2]]
+;
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length3(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length3(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length3(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br label [[ENDBLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length3(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br label [[ENDBLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length3(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length3(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length3(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length3(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length3(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length3(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length3(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length3_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: ret i1 [[TMP12]]
+;
+; X64-SSE41-LABEL: define i1 @length3_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX1-LABEL: define i1 @length3_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX2-LABEL: define i1 @length3_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length3_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512BW-LABEL: define i1 @length3_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length3_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-LABEL: define i1 @length3_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length3_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length3_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length4(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-NEXT: ret i32 [[TMP9]]
+;
+; X64-SSE41-LABEL: define i32 @length4(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-SSE41-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-SSE41-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-SSE41-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-SSE41-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX1-LABEL: define i32 @length4(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX1-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX2-LABEL: define i32 @length4(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX2-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length4(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512BW-LABEL: define i32 @length4(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512BW-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length4(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512F-256-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512F-LABEL: define i32 @length4(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512F-NEXT: ret i32 [[TMP9]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length4(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: ret i32 [[TMP9]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length4(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[TMP9]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length4_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: ret i1 [[TMP3]]
+;
+; X64-SSE41-LABEL: define i1 @length4_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX1-LABEL: define i1 @length4_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX2-LABEL: define i1 @length4_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length4_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512BW-LABEL: define i1 @length4_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length4_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512F-LABEL: define i1 @length4_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP3]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length4_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP3]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length4_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP3]]
+;
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length4_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: ret i1 [[TMP5]]
+;
+; X64-SSE41-LABEL: define i1 @length4_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-SSE41-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX1-LABEL: define i1 @length4_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX2-LABEL: define i1 @length4_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length4_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512BW-LABEL: define i1 @length4_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length4_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-256-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512F-LABEL: define i1 @length4_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-NEXT: ret i1 [[TMP5]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length4_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP5]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length4_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP5]]
+;
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length4_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: ret i1 [[TMP5]]
+;
+; X64-SSE41-LABEL: define i1 @length4_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-SSE41-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX1-LABEL: define i1 @length4_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX2-LABEL: define i1 @length4_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length4_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512BW-LABEL: define i1 @length4_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length4_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-256-NEXT: ret i1 [[TMP5]]
+;
+; X64-AVX512F-LABEL: define i1 @length4_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX512F-NEXT: ret i1 [[TMP5]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length4_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP5]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length4_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP5]]
+;
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length4_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length4_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length4_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length4_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length4_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length4_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length4_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length4_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length4_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length4_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length5(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length5(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length5(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br label [[ENDBLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length5(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br label [[ENDBLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length5(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length5(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length5(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length5(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length5(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length5(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length5(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length5_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: ret i1 [[TMP12]]
+;
+; X64-SSE41-LABEL: define i1 @length5_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX1-LABEL: define i1 @length5_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX2-LABEL: define i1 @length5_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length5_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512BW-LABEL: define i1 @length5_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length5_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-LABEL: define i1 @length5_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length5_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length5_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length5_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length5_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br label [[ENDBLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length5_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br label [[ENDBLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length5_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length5_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length5_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length5_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length5_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br label [[ENDBLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length5_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length5_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length7(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length7(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length7(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length7(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length7(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length7(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length7(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length7(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length7(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length7(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length7(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
+ ret i32 %m
+}
+
+define i1 @length7_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length7_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: ret i1 [[TMP10]]
+;
+; X64-SSE41-LABEL: define i1 @length7_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX1-LABEL: define i1 @length7_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX2-LABEL: define i1 @length7_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length7_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512BW-LABEL: define i1 @length7_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length7_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-LABEL: define i1 @length7_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length7_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length7_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length7_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length7_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length7_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length7_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length7_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length7_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length7_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length7_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length7_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length7_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length8(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-NEXT: ret i32 [[TMP9]]
+;
+; X64-SSE41-LABEL: define i32 @length8(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-SSE41-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-SSE41-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-SSE41-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-SSE41-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-SSE41-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX1-LABEL: define i32 @length8(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX1-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX2-LABEL: define i32 @length8(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX2-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length8(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512BW-LABEL: define i32 @length8(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512BW-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length8(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512F-256-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX512F-LABEL: define i32 @length8(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX512F-NEXT: ret i32 [[TMP9]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length8(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: ret i32 [[TMP9]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length8(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[TMP9]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length8_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length8_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length8_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length8_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length8_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length8_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length8_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length8_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length8_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length8_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length8_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length8_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: ret i1 [[TMP2]]
+;
+; X64-SSE41-LABEL: define i1 @length8_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX1-LABEL: define i1 @length8_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX2-LABEL: define i1 @length8_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length8_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512BW-LABEL: define i1 @length8_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length8_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512F-LABEL: define i1 @length8_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP2]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length8_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP2]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length8_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP2]]
+;
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length9_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length9_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length9_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length9_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length9_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length9_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length9_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length9_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length9_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length9_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length10_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length10_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length10_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length10_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length10_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length10_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length10_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length10_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length10_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length10_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length10_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length11_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length11_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length11_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length11_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length11_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length11_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length11_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length11_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length11_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length11_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length11_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length12_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length12_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: ret i1 [[TMP12]]
+;
+; X64-SSE41-LABEL: define i1 @length12_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX1-LABEL: define i1 @length12_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX2-LABEL: define i1 @length12_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length12_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512BW-LABEL: define i1 @length12_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length12_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-LABEL: define i1 @length12_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length12_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length12_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length12(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length12(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-SSE41-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-SSE41-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-SSE41-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length12(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX1-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX1-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length12(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX2-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX2-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length12(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length12(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX512BW-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX512BW-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length12(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX512F-256-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length12(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX512F-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX512F-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length12(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length12(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
+ ret i32 %m
+}
+
+define i1 @length13_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length13_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length13_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length13_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length13_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length13_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length13_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length13_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length13_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length13_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length13_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length14_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length14_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length14_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length14_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length14_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length14_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length14_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length14_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length14_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length14_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length14_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length15_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @length15_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length15_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length15_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length15_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length15_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length15_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length15_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length15_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length15_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length15_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length16(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length16(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length16(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length16(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length16(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length16(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length16(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length16(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length16(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length16(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(ptr %x, ptr %y) nounwind {
+;
+; X64-LABEL: define i1 @length16_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: ret i1 [[TMP3]]
+;
+; X64-SSE41-LABEL: define i1 @length16_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX1-LABEL: define i1 @length16_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX2-LABEL: define i1 @length16_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length16_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512BW-LABEL: define i1 @length16_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length16_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512F-LABEL: define i1 @length16_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP3]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length16_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP3]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length16_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX-LABEL: length16_eq:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: setne %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length16_eq:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; X64-MIC-AVX-NEXT: kortestw %k0, %k0
+; X64-MIC-AVX-NEXT: setne %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length16_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length16_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length16_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length16_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length16_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length16_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length16_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length16_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length16_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length16_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length16_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length16_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length16_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length16_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length16_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length16_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length16_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length16_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length16_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length16_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_eq_const(ptr %X) nounwind {
+;
+; X64-LABEL: define i1 @length16_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length16_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length16_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length16_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length16_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length16_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length16_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length16_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length16_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length16_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-AVX-LABEL: length16_eq_const:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length16_eq_const:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; X64-MIC-AVX-NEXT: kortestw %k0, %k0
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
+
+define i32 @length24(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length24(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length24(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length24(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length24(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length24(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length24(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length24(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length24(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length24(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length24(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb2:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
+ ret i32 %m
+}
+
+define i1 @length24_eq(ptr %x, ptr %y) nounwind {
+;
+; X64-LABEL: define i1 @length24_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length24_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length24_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length24_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length24_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length24_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length24_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length24_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length24_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length24_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX-LABEL: length24_eq:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length24_eq:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1
+; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length24_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length24_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length24_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length24_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length24_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length24_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length24_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length24_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length24_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length24_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb2:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length24_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length24_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length24_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length24_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length24_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length24_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length24_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length24_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length24_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length24_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb2:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_eq_const(ptr %X) nounwind {
+;
+; X64-LABEL: define i1 @length24_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-NEXT: ret i1 [[TMP8]]
+;
+; X64-SSE41-LABEL: define i1 @length24_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-SSE41-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-SSE41-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-SSE41-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX1-LABEL: define i1 @length24_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX1-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX1-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX1-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX2-LABEL: define i1 @length24_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX2-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX2-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length24_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512BW-LABEL: define i1 @length24_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length24_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512F-LABEL: define i1 @length24_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP8]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length24_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP8]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length24_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX-LABEL: length24_eq_const:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: setne %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length24_eq_const:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [959985462,858927408,0,0]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: setne %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length31(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length31(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length31(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb3:
+; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length31(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb3:
+; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length31(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb3:
+; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length31(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb3:
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length31(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb3:
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length31(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb3:
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length31(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb3:
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length31(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb3:
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length31(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb3:
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb2:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb3:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind
+ ret i32 %m
+}
+
+define i1 @length31_eq(ptr %x, ptr %y) nounwind {
+;
+; X64-LABEL: define i1 @length31_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length31_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length31_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length31_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length31_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length31_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length31_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length31_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length31_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length31_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX-LABEL: length31_eq:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length31_eq:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2
+; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length31_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length31_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb3:
+; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length31_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb3:
+; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length31_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb3:
+; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length31_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb3:
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length31_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb3:
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length31_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb3:
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length31_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb3:
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length31_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb3:
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length31_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb3:
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb2:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb3:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length31_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length31_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb3:
+; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length31_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb3:
+; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length31_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb3:
+; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length31_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb3:
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length31_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb3:
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length31_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb3:
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length31_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb3:
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length31_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb3:
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length31_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb3:
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23
+; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb2:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb3:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+;
+; X64-LABEL: define i1 @length31_eq_prefer128(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length31_eq_prefer128(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length31_eq_prefer128(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length31_eq_prefer128(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length31_eq_prefer128(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length31_eq_prefer128(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length31_eq_prefer128(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length31_eq_prefer128(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length31_eq_prefer128(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length31_eq_prefer128(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX-LABEL: length31_eq_prefer128:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length31_eq_prefer128:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2
+; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_eq_const(ptr %X) nounwind {
+;
+; X64-LABEL: define i1 @length31_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-NEXT: ret i1 [[TMP7]]
+;
+; X64-SSE41-LABEL: define i1 @length31_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX1-LABEL: define i1 @length31_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-AVX1-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX2-LABEL: define i1 @length31_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-AVX2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length31_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX512BW-LABEL: define i1 @length31_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length31_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX512F-LABEL: define i1 @length31_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP7]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length31_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP7]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length31_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX-LABEL: length31_eq_const:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: setne %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length31_eq_const:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [943142453,842084409,909456435,809056311]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: setne %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length32(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length32(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-SSE41-LABEL: define i32 @length32(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb3:
+; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length32(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb3:
+; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length32(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb3:
+; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length32(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb3:
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512BW-LABEL: define i32 @length32(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb3:
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length32(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb3:
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX512F-LABEL: define i32 @length32(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb3:
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length32(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb3:
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length32(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb3:
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb2:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb3:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(ptr %x, ptr %y) nounwind {
+;
+; X64-LABEL: define i1 @length32_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length32_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length32_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length32_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length32_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length32_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length32_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length32_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length32_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length32_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512-LABEL: length32_eq:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: sete %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+; X64-MIC-AVX-LABEL: length32_eq:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; X64-MIC-AVX-NEXT: kortestw %k0, %k0
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length32_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length32_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb3:
+; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length32_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb3:
+; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length32_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb3:
+; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length32_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb3:
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length32_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb3:
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length32_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb3:
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length32_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb3:
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length32_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb3:
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length32_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb3:
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb2:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb3:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length32_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length32_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X64-SSE41: res_block:
+; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-SSE41: loadbb:
+; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-SSE41: loadbb1:
+; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb2:
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-SSE41: loadbb3:
+; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-SSE41: endblock:
+; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length32_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb2:
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX1: loadbb3:
+; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length32_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb2:
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX2: loadbb3:
+; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length32_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW-256: res_block:
+; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW-256: loadbb:
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW-256: loadbb1:
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb2:
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: loadbb3:
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW-256: endblock:
+; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length32_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512BW: res_block:
+; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512BW: loadbb:
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512BW: loadbb1:
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb2:
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512BW: loadbb3:
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512BW: endblock:
+; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length32_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F-256: res_block:
+; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F-256: loadbb:
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F-256: loadbb1:
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb2:
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F-256: loadbb3:
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F-256: endblock:
+; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length32_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX512F: res_block:
+; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX512F: loadbb:
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX512F: loadbb1:
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb2:
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-AVX512F: loadbb3:
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX512F: endblock:
+; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length32_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX2: res_block:
+; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX2: loadbb:
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX2: loadbb1:
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb2:
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: loadbb3:
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX2: endblock:
+; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length32_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]]
+; X64-MIC-AVX512F: res_block:
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ]
+; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ]
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb:
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-MIC-AVX512F: loadbb1:
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb2:
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]])
+; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]])
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: loadbb3:
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24
+; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]])
+; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]])
+; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]]
+; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-MIC-AVX512F: endblock:
+; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+; X64-SSE2: res_block:
+
+
+
+
+
+; X64-SSE2: loadbb:
+
+
+
+
+
+
+; X64-SSE2: loadbb1:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb2:
+
+
+
+
+
+
+
+
+; X64-SSE2: loadbb3:
+
+
+
+
+
+
+
+
+; X64-SSE2: endblock:
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+;
+; X64-LABEL: define i1 @length32_eq_prefer128(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length32_eq_prefer128(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length32_eq_prefer128(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length32_eq_prefer128(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length32_eq_prefer128(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length32_eq_prefer128(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length32_eq_prefer128(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length32_eq_prefer128(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length32_eq_prefer128(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length32_eq_prefer128(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX-LABEL: length32_eq_prefer128:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
+; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length32_eq_prefer128:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2
+; X64-MIC-AVX-NEXT: vmovdqu 16(%rsi), %xmm3
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_const(ptr %X) nounwind {
+;
+; X64-LABEL: define i1 @length32_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-NEXT: ret i1 [[TMP7]]
+;
+; X64-SSE41-LABEL: define i1 @length32_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX1-LABEL: define i1 @length32_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX2-LABEL: define i1 @length32_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length32_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512BW-LABEL: define i1 @length32_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length32_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512F-LABEL: define i1 @length32_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP2]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length32_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP2]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length32_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX512-LABEL: length32_eq_const:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: setne %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+; X64-MIC-AVX-LABEL: length32_eq_const:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; X64-MIC-AVX-NEXT: kortestw %k0, %k0
+; X64-MIC-AVX-NEXT: setne %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length48(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length48(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length48(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length48(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length48(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length48(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length48(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length48(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length48(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length48(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length48(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind
+ ret i32 %m
+}
+
+define i1 @length48_eq(ptr %x, ptr %y) nounwind {
+;
+; X64-LABEL: define i1 @length48_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length48_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length48_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length48_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length48_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length48_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length48_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length48_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length48_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length48_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512-LABEL: length48_eq:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1
+; X64-AVX512-NEXT: vmovdqu 32(%rsi), %xmm2
+; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: sete %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+; X64-MIC-AVX-LABEL: length48_eq:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1
+; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2
+; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm3
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length48_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length48_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length48_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length48_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length48_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length48_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length48_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length48_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length48_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length48_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length48_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length48_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length48_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length48_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length48_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length48_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length48_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length48_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length48_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length48_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+;
+; X64-LABEL: define i1 @length48_eq_prefer128(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length48_eq_prefer128(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length48_eq_prefer128(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-AVX1-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-AVX1-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length48_eq_prefer128(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-AVX2-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length48_eq_prefer128(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length48_eq_prefer128(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length48_eq_prefer128(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length48_eq_prefer128(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length48_eq_prefer128(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length48_eq_prefer128(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX-LABEL: length48_eq_prefer128:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
+; X64-AVX-NEXT: vmovdqu 32(%rdi), %xmm2
+; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor 32(%rsi), %xmm2, %xmm1
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-MIC-AVX-LABEL: length48_eq_prefer128:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-MIC-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
+; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm3
+; X64-MIC-AVX-NEXT: vmovdqu 16(%rsi), %xmm4
+; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm5
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm4, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm0, %k1
+; X64-MIC-AVX-NEXT: korw %k0, %k1, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm5, %zmm2, %k1
+; X64-MIC-AVX-NEXT: kortestw %k1, %k0
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_eq_const(ptr %X) nounwind {
+;
+; X64-LABEL: define i1 @length48_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-NEXT: [[TMP10:%.*]] = or i128 [[TMP9]], [[TMP8]]
+; X64-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0
+; X64-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-NEXT: ret i1 [[TMP11]]
+;
+; X64-SSE41-LABEL: define i1 @length48_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-SSE41-NEXT: [[TMP10:%.*]] = or i128 [[TMP9]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0
+; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP11]]
+;
+; X64-AVX1-LABEL: define i1 @length48_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-AVX1-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-AVX1-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-AVX1-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX2-LABEL: define i1 @length48_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-AVX2-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-AVX2-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length48_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512BW-LABEL: define i1 @length48_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length48_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512F-LABEL: define i1 @length48_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP8]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length48_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP8]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length48_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX512-LABEL: length48_eq_const:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1
+; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: setne %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+; X64-MIC-AVX-LABEL: length48_eq_const:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
+; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm1
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,0,0,0,0]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: setne %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length63(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length63(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length63(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length63(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length63(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length63(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length63(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length63(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length63(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length63(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length63(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind
+ ret i32 %m
+}
+
+define i1 @length63_eq(ptr %x, ptr %y) nounwind {
+;
+; X64-LABEL: define i1 @length63_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 47
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 47
+; X64-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1
+; X64-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]]
+; X64-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]]
+; X64-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]]
+; X64-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0
+; X64-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-NEXT: ret i1 [[TMP22]]
+;
+; X64-SSE41-LABEL: define i1 @length63_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 47
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 47
+; X64-SSE41-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]]
+; X64-SSE41-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]]
+; X64-SSE41-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0
+; X64-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX1-LABEL: define i1 @length63_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX2-LABEL: define i1 @length63_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length63_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512BW-LABEL: define i1 @length63_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length63_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-LABEL: define i1 @length63_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length63_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length63_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512-LABEL: length63_eq:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1
+; X64-AVX512-NEXT: vpxor 31(%rsi), %ymm1, %ymm1
+; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: setne %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+; X64-MIC-AVX-LABEL: length63_eq:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
+; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1
+; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm2
+; X64-MIC-AVX-NEXT: vmovdqu 31(%rsi), %ymm3
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: setne %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length63_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length63_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length63_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length63_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length63_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length63_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length63_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length63_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length63_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length63_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length63_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length63_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length63_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length63_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length63_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length63_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length63_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length63_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length63_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length63_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_eq_const(ptr %X) nounwind {
+;
+; X64-LABEL: define i1 @length63_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 47
+; X64-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 66716800424378146251538984255488604215
+; X64-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]]
+; X64-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0
+; X64-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length63_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 47
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 66716800424378146251538984255488604215
+; X64-SSE41-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-SSE41-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]]
+; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0
+; X64-SSE41-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length63_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-AVX1-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length63_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length63_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length63_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length63_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length63_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length63_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length63_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-AVX512-LABEL: length63_eq_const:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1
+; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: sete %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+; X64-MIC-AVX-LABEL: length63_eq_const:
+; X64-MIC-AVX: # %bb.0:
+; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
+; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [875770417,943142453,842084409,909456435,809056311,875770417,943142453,842084409]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX-NEXT: sete %al
+; X64-MIC-AVX-NEXT: vzeroupper
+; X64-MIC-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length64(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length64(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length64(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length64(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length64(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length64(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length64(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length64(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length64(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length64(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length64(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(ptr %x, ptr %y) nounwind {
+;
+; X64-LABEL: define i1 @length64_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; X64-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1
+; X64-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1
+; X64-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]]
+; X64-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]]
+; X64-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]]
+; X64-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0
+; X64-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-NEXT: ret i1 [[TMP22]]
+;
+; X64-SSE41-LABEL: define i1 @length64_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1
+; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]]
+; X64-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 48
+; X64-SSE41-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1
+; X64-SSE41-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1
+; X64-SSE41-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]]
+; X64-SSE41-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-SSE41-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]]
+; X64-SSE41-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]]
+; X64-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0
+; X64-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-SSE41-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX1-LABEL: define i1 @length64_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX2-LABEL: define i1 @length64_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length64_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512BW-LABEL: define i1 @length64_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length64_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-LABEL: define i1 @length64_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP3]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length64_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length64_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX512-LABEL: length64_eq:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512-NEXT: vpcmpneqd (%rsi), %zmm0, %k0
+; X64-AVX512-NEXT: kortestw %k0, %k0
+; X64-AVX512-NEXT: setne %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length64_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length64_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length64_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length64_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length64_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length64_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length64_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length64_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length64_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length64_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length64_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length64_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length64_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length64_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length64_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length64_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length64_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length64_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length64_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length64_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_eq_const(ptr %X) nounwind {
+;
+; X64-LABEL: define i1 @length64_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; X64-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 68051240286688436651889234231545575736
+; X64-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]]
+; X64-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0
+; X64-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length64_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1
+; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690
+; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 48
+; X64-SSE41-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1
+; X64-SSE41-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 68051240286688436651889234231545575736
+; X64-SSE41-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-SSE41-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]]
+; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]]
+; X64-SSE41-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0
+; X64-SSE41-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length64_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX1-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length64_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length64_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length64_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length64_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length64_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length64_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length64_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-AVX512-LABEL: length64_eq_const:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0
+; X64-AVX512-NEXT: kortestw %k0, %k0
+; X64-AVX512-NEXT: sete %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length96(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length96(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length96(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length96(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length96(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length96(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length96(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length96(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length96(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length96(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length96(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind
+ ret i32 %m
+}
+
+define i1 @length96_eq(ptr %x, ptr %y) nounwind {
+; X64-SSE-LABEL: length96_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $96, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length96_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length96_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length96_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX1-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]]
+; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0
+; X64-AVX1-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP16]]
+;
+; X64-AVX2-LABEL: define i1 @length96_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]]
+; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0
+; X64-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP16]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length96_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP16]]
+;
+; X64-AVX512BW-LABEL: define i1 @length96_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i256 [[TMP6]] to i512
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i256 [[TMP7]] to i512
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i512 [[TMP8]], [[TMP9]]
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i512 [[TMP3]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i512 [[TMP11]], 0
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length96_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]]
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP16]]
+;
+; X64-AVX512F-LABEL: define i1 @length96_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i256 [[TMP6]] to i512
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i256 [[TMP7]] to i512
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i512 [[TMP8]], [[TMP9]]
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i512 [[TMP3]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i512 [[TMP11]], 0
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length96_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP16]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length96_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i256 [[TMP6]] to i512
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i256 [[TMP7]] to i512
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i512 [[TMP8]], [[TMP9]]
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i512 [[TMP3]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i512 [[TMP11]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length96_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length96_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length96_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length96_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length96_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length96_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length96_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length96_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length96_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length96_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length96_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length96_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length96_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length96_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length96_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length96_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length96_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length96_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length96_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length96_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_eq_const(ptr %X) nounwind {
+; X64-SSE-LABEL: length96_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $96, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length96_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 96) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length96_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 96) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length96_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0
+; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length96_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0
+; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length96_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length96_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i256 [[TMP5]] to i512
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP6]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length96_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length96_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i256 [[TMP5]] to i512
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP6]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length96_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length96_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i256 [[TMP5]] to i512
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP6]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length127(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length127(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length127(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length127(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length127(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length127(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length127(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length127(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length127(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length127(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length127(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind
+ ret i32 %m
+}
+
+define i1 @length127_eq(ptr %x, ptr %y) nounwind {
+; X64-SSE-LABEL: length127_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $127, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length127_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length127_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length127_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95
+; X64-AVX1-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX1-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX1-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX1-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX2-LABEL: define i1 @length127_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95
+; X64-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length127_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX512BW-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512BW-LABEL: define i1 @length127_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 63
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length127_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX512F-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512F-LABEL: define i1 @length127_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 63
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length127_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-MIC-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP22]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length127_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 63
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length127_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length127_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length127_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length127_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length127_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length127_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length127_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length127_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length127_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length127_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length127_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length127_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length127_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length127_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length127_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length127_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length127_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length127_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length127_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length127_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_eq_const(ptr %X) nounwind {
+; X64-SSE-LABEL: length127_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $127, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length127_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 127) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length127_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 127) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length127_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677
+; X64-AVX1-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX1-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX1-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length127_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677
+; X64-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length127_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length127_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 63), align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length127_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length127_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 63), align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length127_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length127_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 63), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length128(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length128(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length128(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length128(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length128(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length128(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length128(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length128(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length128(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length128(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length128(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind
+ ret i32 %m
+}
+
+define i1 @length128_eq(ptr %x, ptr %y) nounwind {
+; X64-SSE-LABEL: length128_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $128, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length128_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length128_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length128_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96
+; X64-AVX1-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX1-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX1-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX1-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX1-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX1-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX1-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX2-LABEL: define i1 @length128_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96
+; X64-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length128_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX512BW-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512BW-256-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512BW-LABEL: define i1 @length128_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length128_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-AVX512F-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX512F-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512F-256-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512F-LABEL: define i1 @length128_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length128_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]]
+; X64-MIC-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]]
+; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0
+; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-MIC-AVX2-NEXT: ret i1 [[TMP22]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length128_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length128_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length128_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length128_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length128_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length128_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length128_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length128_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length128_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length128_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length128_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length128_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length128_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length128_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length128_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length128_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length128_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length128_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length128_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length128_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length128_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_eq_const(ptr %X) nounwind {
+; X64-SSE-LABEL: length128_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $128, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length128_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 128) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length128_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 128) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length128_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934
+; X64-AVX1-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX1-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX1-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length128_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934
+; X64-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length128_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934
+; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length128_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length128_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934
+; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length128_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length128_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820
+; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96
+; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1
+; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934
+; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]]
+; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]]
+; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0
+; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length128_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length192(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length192(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length192(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length192(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length192(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length192(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length192(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length192(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length192(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length192(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length192(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind
+ ret i32 %m
+}
+
+define i1 @length192_eq(ptr %x, ptr %y) nounwind {
+; X64-SSE-LABEL: length192_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $192, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length192_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length192_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length192_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length192_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length192_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length192_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = or i512 [[TMP14]], [[TMP13]]
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = icmp ne i512 [[TMP15]], 0
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP16]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length192_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length192_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = or i512 [[TMP14]], [[TMP13]]
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i512 [[TMP15]], 0
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP16]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length192_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length192_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = or i512 [[TMP14]], [[TMP13]]
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i512 [[TMP15]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP16]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length192_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length192_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length192_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length192_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length192_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length192_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length192_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length192_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length192_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length192_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length192_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length192_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length192_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length192_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length192_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length192_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length192_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length192_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length192_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length192_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_eq_const(ptr %X) nounwind {
+; X64-SSE-LABEL: length192_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $192, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length192_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length192_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length192_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length192_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length192_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length192_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = or i512 [[TMP12]], [[TMP11]]
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp ne i512 [[TMP13]], 0
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length192_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length192_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = or i512 [[TMP12]], [[TMP11]]
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp ne i512 [[TMP13]], 0
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length192_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length192_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = or i512 [[TMP12]], [[TMP11]]
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp ne i512 [[TMP13]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length255(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length255(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length255(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length255(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length255(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length255(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length255(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length255(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length255(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length255(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length255(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind
+ ret i32 %m
+}
+
+define i1 @length255_eq(ptr %x, ptr %y) nounwind {
+; X64-SSE-LABEL: length255_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $255, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length255_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length255_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length255_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length255_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length255_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length255_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 191
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 191
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]]
+; X64-AVX512BW-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]]
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length255_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length255_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 191
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 191
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]]
+; X64-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]]
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP22]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length255_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length255_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 191
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 191
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]]
+; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]]
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP22]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length255_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length255_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length255_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length255_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length255_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length255_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length255_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length255_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length255_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length255_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length255_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length255_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length255_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length255_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length255_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length255_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length255_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length255_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length255_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length255_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_eq_const(ptr %X) nounwind {
+; X64-SSE-LABEL: length255_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $255, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length255_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length255_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length255_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length255_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length255_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length255_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 191
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 191), align 1
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]]
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]]
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]]
+; X64-AVX512BW-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0
+; X64-AVX512BW-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length255_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length255_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 191
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 191), align 1
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]]
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]]
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]]
+; X64-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0
+; X64-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length255_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length255_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 191
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 191), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]]
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]]
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]]
+; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length256(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length256(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length256(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length256(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length256(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length256(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length256(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length256(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length256(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length256(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length256(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind
+ ret i32 %m
+}
+
+define i1 @length256_eq(ptr %x, ptr %y) nounwind {
+; X64-SSE-LABEL: length256_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $256, %edx # imm = 0x100
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length256_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length256_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length256_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length256_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length256_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length256_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 192
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 192
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]]
+; X64-AVX512BW-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512BW-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]]
+; X64-AVX512BW-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]]
+; X64-AVX512BW-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0
+; X64-AVX512BW-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512BW-NEXT: ret i1 [[TMP22]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length256_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length256_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 192
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 192
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]]
+; X64-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]]
+; X64-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]]
+; X64-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0
+; X64-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-AVX512F-NEXT: ret i1 [[TMP22]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length256_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length256_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]]
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 192
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 192
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]]
+; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]]
+; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]]
+; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]]
+; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32
+; X64-MIC-AVX512F-NEXT: ret i1 [[TMP22]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length256_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length256_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length256_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length256_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length256_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length256_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length256_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length256_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length256_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length256_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length256_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length256_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length256_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length256_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length256_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length256_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length256_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length256_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length256_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length256_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_eq_const(ptr %X) nounwind {
+; X64-SSE-LABEL: length256_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $256, %edx # imm = 0x100
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+;
+; X64-LABEL: define i1 @length256_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length256_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length256_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length256_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length256_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length256_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512BW-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-AVX512BW-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-AVX512BW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 192
+; X64-AVX512BW-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1
+; X64-AVX512BW-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 192), align 1
+; X64-AVX512BW-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]]
+; X64-AVX512BW-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512BW-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]]
+; X64-AVX512BW-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]]
+; X64-AVX512BW-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0
+; X64-AVX512BW-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length256_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length256_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 192
+; X64-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1
+; X64-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 192), align 1
+; X64-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]]
+; X64-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]]
+; X64-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]]
+; X64-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0
+; X64-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length256_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length256_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1
+; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]]
+; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64
+; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]]
+; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128
+; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]]
+; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 192
+; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1
+; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 192), align 1
+; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]]
+; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]]
+; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]]
+; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]]
+; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0
+; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length384(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length384(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length384(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length384(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length384(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length384(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length384(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length384(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length384(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length384(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length384(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind
+ ret i32 %m
+}
+
+define i1 @length384_eq(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length384_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length384_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length384_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length384_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length384_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length384_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length384_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length384_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length384_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length384_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length384_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length384_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length384_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length384_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length384_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length384_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length384_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length384_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length384_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length384_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length384_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length384_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length384_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length384_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length384_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length384_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length384_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length384_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length384_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length384_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length384_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length384_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length384_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length384_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length384_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length384_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length384_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length384_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length384_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length384_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length511(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length511(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length511(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length511(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length511(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length511(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length511(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length511(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length511(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length511(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length511(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind
+ ret i32 %m
+}
+
+define i1 @length511_eq(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length511_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length511_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length511_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length511_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length511_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length511_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length511_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length511_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length511_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length511_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length511_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length511_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length511_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length511_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length511_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length511_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length511_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length511_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length511_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length511_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length511_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length511_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length511_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length511_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length511_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length511_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length511_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length511_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length511_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length511_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length511_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length511_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length511_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length511_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length511_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length511_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length511_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length511_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length511_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length511_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length512(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @length512(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @length512(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length512(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length512(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @length512(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @length512(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @length512(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @length512(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @length512(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @length512(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind
+ ret i32 %m
+}
+
+define i1 @length512_eq(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length512_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length512_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length512_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length512_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length512_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length512_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length512_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length512_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length512_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length512_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_lt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length512_lt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length512_lt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length512_lt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length512_lt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length512_lt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length512_lt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length512_lt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length512_lt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length512_lt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length512_lt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_gt(ptr %x, ptr %y) nounwind {
+; X64-LABEL: define i1 @length512_gt(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-SSE41-LABEL: define i1 @length512_gt(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-SSE41-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length512_gt(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length512_gt(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length512_gt(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512BW-LABEL: define i1 @length512_gt(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512BW-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length512_gt(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX512F-LABEL: define i1 @length512_gt(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-AVX512F-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length512_gt(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[CMP]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length512_gt(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]]
+;
+
+
+
+
+
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_eq_const(ptr %X) nounwind {
+; X64-LABEL: define i1 @length512_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @length512_eq_const(
+; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length512_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length512_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @length512_eq_const(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @length512_eq_const(
+; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @length512_eq_const(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @length512_eq_const(
+; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @length512_eq_const(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @length512_eq_const(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; This checks that we do not do stupid things with huge sizes.
+define i32 @huge_length(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i32 @huge_length(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @huge_length(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @huge_length(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @huge_length(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @huge_length(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @huge_length(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @huge_length(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @huge_length(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @huge_length(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @huge_length(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind
+ ret i32 %m
+}
+
+define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind {
+; X64-LABEL: define i1 @huge_length_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @huge_length_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @huge_length_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @huge_length_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @huge_length_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @huge_length_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @huge_length_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @huge_length_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @huge_length_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @huge_length_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; This checks non-constant sizes.
+define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind {
+; X64-LABEL: define i32 @nonconst_length(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-SSE41-LABEL: define i32 @nonconst_length(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-SSE41-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @nonconst_length(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @nonconst_length(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-256-LABEL: define i32 @nonconst_length(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512BW-LABEL: define i32 @nonconst_length(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512BW-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-256-LABEL: define i32 @nonconst_length(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: ret i32 [[M]]
+;
+; X64-AVX512F-LABEL: define i32 @nonconst_length(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512F-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX2-LABEL: define i32 @nonconst_length(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: ret i32 [[M]]
+;
+; X64-MIC-AVX512F-LABEL: define i32 @nonconst_length(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: ret i32 [[M]]
+;
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind
+ ret i32 %m
+}
+
+define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind {
+; X64-LABEL: define i1 @nonconst_length_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-SSE41-LABEL: define i1 @nonconst_length_eq(
+; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-SSE41-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @nonconst_length_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @nonconst_length_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-256-LABEL: define i1 @nonconst_length_eq(
+; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512BW-LABEL: define i1 @nonconst_length_eq(
+; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512BW-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-256-LABEL: define i1 @nonconst_length_eq(
+; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-256-NEXT: ret i1 [[C]]
+;
+; X64-AVX512F-LABEL: define i1 @nonconst_length_eq(
+; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX512F-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX2-LABEL: define i1 @nonconst_length_eq(
+; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX2-NEXT: ret i1 [[C]]
+;
+; X64-MIC-AVX512F-LABEL: define i1 @nonconst_length_eq(
+; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-MIC-AVX512F-NEXT: ret i1 [[C]]
+;
+
+
+
+
+
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-nobuiltin.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-nobuiltin.ll
new file mode 100644
index 00000000000000..1ad91adb9e533e
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-nobuiltin.ll
@@ -0,0 +1,248 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64_1LD
+; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64_2LD
+
+
+declare signext i32 @memcmp(ptr %src1, ptr %src2, i64 %size)
+
+; Zero-length comparisons should be optimized away.
+define i32 @f1(ptr %src1, ptr %src2) {
+; X64-LABEL: define i32 @f1(
+; X64-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) {
+; X64-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 0) #[[ATTR0:[0-9]+]]
+; X64-NEXT: ret i32 [[RES]]
+;
+; X64_1LD-LABEL: define i32 @f1(
+; X64_1LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) {
+; X64_1LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 0) #[[ATTR0:[0-9]+]]
+; X64_1LD-NEXT: ret i32 [[RES]]
+;
+; X64_2LD-LABEL: define i32 @f1(
+; X64_2LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) {
+; X64_2LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 0) #[[ATTR0:[0-9]+]]
+; X64_2LD-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @memcmp(ptr %src1, ptr %src2, i64 0) nobuiltin
+ ret i32 %res
+}
+
+; Check a case where the result is used as an integer.
+define i32 @f2(ptr %src1, ptr %src2) {
+; X64-LABEL: define i32 @f2(
+; X64-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) {
+; X64-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 2) #[[ATTR0]]
+; X64-NEXT: ret i32 [[RES]]
+;
+; X64_1LD-LABEL: define i32 @f2(
+; X64_1LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) {
+; X64_1LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 2) #[[ATTR0]]
+; X64_1LD-NEXT: ret i32 [[RES]]
+;
+; X64_2LD-LABEL: define i32 @f2(
+; X64_2LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) {
+; X64_2LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 2) #[[ATTR0]]
+; X64_2LD-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @memcmp(ptr %src1, ptr %src2, i64 2) nobuiltin
+ ret i32 %res
+}
+
+; Check a case where the result is tested for equality.
+define void @f3(ptr %src1, ptr %src2, ptr %dest) {
+; X64-LABEL: define void @f3(
+; X64-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) {
+; X64-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 3) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 0
+; X64-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]]
+; X64: store:
+; X64-NEXT: store i32 0, ptr [[DEST]], align 4
+; X64-NEXT: br label [[EXIT]]
+; X64: exit:
+; X64-NEXT: ret void
+;
+; X64_1LD-LABEL: define void @f3(
+; X64_1LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) {
+; X64_1LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 3) #[[ATTR0]]
+; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 0
+; X64_1LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]]
+; X64_1LD: store:
+; X64_1LD-NEXT: store i32 0, ptr [[DEST]], align 4
+; X64_1LD-NEXT: br label [[EXIT]]
+; X64_1LD: exit:
+; X64_1LD-NEXT: ret void
+;
+; X64_2LD-LABEL: define void @f3(
+; X64_2LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) {
+; X64_2LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 3) #[[ATTR0]]
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 0
+; X64_2LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]]
+; X64_2LD: store:
+; X64_2LD-NEXT: store i32 0, ptr [[DEST]], align 4
+; X64_2LD-NEXT: br label [[EXIT]]
+; X64_2LD: exit:
+; X64_2LD-NEXT: ret void
+;
+ %res = call i32 @memcmp(ptr %src1, ptr %src2, i64 3) nobuiltin
+ %cmp = icmp eq i32 %res, 0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store i32 0, ptr %dest
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Check a case where the result is tested for inequality.
+define void @f4(ptr %src1, ptr %src2, ptr %dest) {
+; X64-LABEL: define void @f4(
+; X64-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) {
+; X64-NEXT: entry:
+; X64-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 4) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[RES]], 0
+; X64-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]]
+; X64: store:
+; X64-NEXT: store i32 0, ptr [[DEST]], align 4
+; X64-NEXT: br label [[EXIT]]
+; X64: exit:
+; X64-NEXT: ret void
+;
+; X64_1LD-LABEL: define void @f4(
+; X64_1LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) {
+; X64_1LD-NEXT: entry:
+; X64_1LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 4) #[[ATTR0]]
+; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[RES]], 0
+; X64_1LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]]
+; X64_1LD: store:
+; X64_1LD-NEXT: store i32 0, ptr [[DEST]], align 4
+; X64_1LD-NEXT: br label [[EXIT]]
+; X64_1LD: exit:
+; X64_1LD-NEXT: ret void
+;
+; X64_2LD-LABEL: define void @f4(
+; X64_2LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) {
+; X64_2LD-NEXT: entry:
+; X64_2LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 4) #[[ATTR0]]
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[RES]], 0
+; X64_2LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]]
+; X64_2LD: store:
+; X64_2LD-NEXT: store i32 0, ptr [[DEST]], align 4
+; X64_2LD-NEXT: br label [[EXIT]]
+; X64_2LD: exit:
+; X64_2LD-NEXT: ret void
+;
+entry:
+ %res = call i32 @memcmp(ptr %src1, ptr %src2, i64 4) nobuiltin
+ %cmp = icmp ne i32 %res, 0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store i32 0, ptr %dest
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Check a case where the result is tested via slt.
+define void @f5(ptr %src1, ptr %src2, ptr %dest) {
+; X64-LABEL: define void @f5(
+; X64-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) {
+; X64-NEXT: entry:
+; X64-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 5) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[RES]], 0
+; X64-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]]
+; X64: store:
+; X64-NEXT: store i32 0, ptr [[DEST]], align 4
+; X64-NEXT: br label [[EXIT]]
+; X64: exit:
+; X64-NEXT: ret void
+;
+; X64_1LD-LABEL: define void @f5(
+; X64_1LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) {
+; X64_1LD-NEXT: entry:
+; X64_1LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 5) #[[ATTR0]]
+; X64_1LD-NEXT: [[CMP:%.*]] = icmp slt i32 [[RES]], 0
+; X64_1LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]]
+; X64_1LD: store:
+; X64_1LD-NEXT: store i32 0, ptr [[DEST]], align 4
+; X64_1LD-NEXT: br label [[EXIT]]
+; X64_1LD: exit:
+; X64_1LD-NEXT: ret void
+;
+; X64_2LD-LABEL: define void @f5(
+; X64_2LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) {
+; X64_2LD-NEXT: entry:
+; X64_2LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 5) #[[ATTR0]]
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp slt i32 [[RES]], 0
+; X64_2LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]]
+; X64_2LD: store:
+; X64_2LD-NEXT: store i32 0, ptr [[DEST]], align 4
+; X64_2LD-NEXT: br label [[EXIT]]
+; X64_2LD: exit:
+; X64_2LD-NEXT: ret void
+;
+entry:
+ %res = call i32 @memcmp(ptr %src1, ptr %src2, i64 5) nobuiltin
+ %cmp = icmp slt i32 %res, 0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store i32 0, ptr %dest
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Check a case where the result is tested for sgt.
+define void @f6(ptr %src1, ptr %src2, ptr %dest) {
+; X64-LABEL: define void @f6(
+; X64-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) {
+; X64-NEXT: entry:
+; X64-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 6) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[RES]], 0
+; X64-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]]
+; X64: store:
+; X64-NEXT: store i32 0, ptr [[DEST]], align 4
+; X64-NEXT: br label [[EXIT]]
+; X64: exit:
+; X64-NEXT: ret void
+;
+; X64_1LD-LABEL: define void @f6(
+; X64_1LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) {
+; X64_1LD-NEXT: entry:
+; X64_1LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 6) #[[ATTR0]]
+; X64_1LD-NEXT: [[CMP:%.*]] = icmp sgt i32 [[RES]], 0
+; X64_1LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]]
+; X64_1LD: store:
+; X64_1LD-NEXT: store i32 0, ptr [[DEST]], align 4
+; X64_1LD-NEXT: br label [[EXIT]]
+; X64_1LD: exit:
+; X64_1LD-NEXT: ret void
+;
+; X64_2LD-LABEL: define void @f6(
+; X64_2LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) {
+; X64_2LD-NEXT: entry:
+; X64_2LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 6) #[[ATTR0]]
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp sgt i32 [[RES]], 0
+; X64_2LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]]
+; X64_2LD: store:
+; X64_2LD-NEXT: store i32 0, ptr [[DEST]], align 4
+; X64_2LD-NEXT: br label [[EXIT]]
+; X64_2LD: exit:
+; X64_2LD-NEXT: ret void
+;
+entry:
+ %res = call i32 @memcmp(ptr %src1, ptr %src2, i64 6) nobuiltin
+ %cmp = icmp sgt i32 %res, 0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store i32 0, ptr %dest
+ br label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize-x32.ll
new file mode 100644
index 00000000000000..b36c0db432820d
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize-x32.ll
@@ -0,0 +1,870 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=cmov < %s | FileCheck %s --check-prefix=X86
+; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=X86-SSE2
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+ at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
+
+declare dso_local i32 @memcmp(ptr, ptr, i32)
+declare dso_local i32 @bcmp(ptr, ptr, i32)
+
+define i32 @length2(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i32 @length2(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: ret i32 [[TMP7]]
+;
+; X86-SSE2-LABEL: define i32 @length2(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: ret i32 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_eq(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i1 @length2_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_const(ptr %X) nounwind optsize {
+; X86-LABEL: define i1 @length2_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-NEXT: ret i1 [[TMP2]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR3:[0-9]+]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR3:[0-9]+]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length3(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i32 @length3(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br label [[ENDBLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length3(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br label [[ENDBLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i1 @length3_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X86-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X86-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X86-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE2-LABEL: define i1 @length3_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X86-SSE2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i32 @length4(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X86-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X86-NEXT: ret i32 [[TMP9]]
+;
+; X86-SSE2-LABEL: define i32 @length4(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X86-SSE2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X86-SSE2-NEXT: ret i32 [[TMP9]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i1 @length4_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-NEXT: ret i1 [[TMP3]]
+;
+; X86-SSE2-LABEL: define i1 @length4_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP3]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_eq_const(ptr %X) nounwind optsize {
+; X86-LABEL: define i1 @length4_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length4_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length5(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i32 @length5(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br label [[ENDBLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length5(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br label [[ENDBLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i1 @length5_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X86-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X86-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X86-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE2-LABEL: define i1 @length5_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X86-SSE2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i32 @length8(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length8(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i1 @length8_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length8_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length8_eq_const(ptr %X) nounwind optsize {
+; X86-LABEL: define i1 @length8_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE2-LABEL: define i1 @length8_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length12_eq(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i1 @length12_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4:[0-9]+]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length12_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4:[0-9]+]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i32 @length12(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length12(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
+ ret i32 %m
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i32 @length16(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR4]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length16(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR4]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(ptr %x, ptr %y) nounwind optsize {
+; X86-NOSSE-LABEL: length16_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $16
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length16_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR4]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length16_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP3]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_eq_const(ptr %X) nounwind optsize {
+; X86-NOSSE-LABEL: length16_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $16
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length16_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 16) #[[ATTR4]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length16_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
+
+define i32 @length24(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i32 @length24(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR4]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length24(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR4]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind
+ ret i32 %m
+}
+
+define i1 @length24_eq(ptr %x, ptr %y) nounwind optsize {
+; X86-NOSSE-LABEL: length24_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $24
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length24_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR4]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length24_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_eq_const(ptr %X) nounwind optsize {
+; X86-NOSSE-LABEL: length24_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $24
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length24_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR4]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length24_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 68051240286688436651889234231545575736
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length32(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i32 @length32(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR4]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length32(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR4]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(ptr %x, ptr %y) nounwind optsize {
+; X86-NOSSE-LABEL: length32_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length32_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR4]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length32_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_const(ptr %X) nounwind optsize {
+; X86-NOSSE-LABEL: length32_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length32_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR4]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length32_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length64(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i32 @length64(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length64(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(ptr %x, ptr %y) nounwind optsize {
+; X86-LABEL: define i1 @length64_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length64_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_eq_const(ptr %X) nounwind optsize {
+; X86-LABEL: define i1 @length64_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR4]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length64_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR4]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind optsize {
+; X86-LABEL: define i32 @bcmp_length2(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-NEXT: ret i32 [[TMP4]]
+;
+; X86-SSE2-LABEL: define i32 @bcmp_length2(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: ret i32 [[TMP4]]
+;
+ %m = tail call i32 @bcmp(ptr %X, ptr %Y, i32 2) nounwind
+ ret i32 %m
+}
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize.ll
new file mode 100644
index 00000000000000..cb6c5e6da1c790
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize.ll
@@ -0,0 +1,1414 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64
+; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=X64-AVX1
+; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s --check-prefix=X64-AVX2
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+ at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
+
+declare dso_local i32 @memcmp(ptr, ptr, i64)
+declare dso_local i32 @bcmp(ptr, ptr, i64)
+
+define i32 @length2(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i32 @length2(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX1-LABEL: define i32 @length2(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX2-LABEL: define i32 @length2(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: ret i32 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_eq(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i1 @length2_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_const(ptr %X) nounwind optsize {
+; X64-LABEL: define i1 @length2_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2:[0-9]+]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length3(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i32 @length3(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length3(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br label [[ENDBLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length3(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i1 @length3_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX1-LABEL: define i1 @length3_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX2-LABEL: define i1 @length3_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i32 @length4(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX1-LABEL: define i32 @length4(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX1-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX2-LABEL: define i32 @length4(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX2-NEXT: ret i32 [[TMP9]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i1 @length4_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX1-LABEL: define i1 @length4_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX2-LABEL: define i1 @length4_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP3]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_eq_const(ptr %X) nounwind optsize {
+; X64-LABEL: define i1 @length4_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length4_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length4_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length5(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i32 @length5(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length5(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br label [[ENDBLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length5(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i1 @length5_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX1-LABEL: define i1 @length5_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX2-LABEL: define i1 @length5_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i32 @length8(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX1-LABEL: define i32 @length8(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX1-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX2-LABEL: define i32 @length8(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX2-NEXT: ret i32 [[TMP9]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i1 @length8_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length8_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length8_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length8_eq_const(ptr %X) nounwind optsize {
+; X64-LABEL: define i1 @length8_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX1-LABEL: define i1 @length8_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX2-LABEL: define i1 @length8_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length12_eq(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i1 @length12_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX1-LABEL: define i1 @length12_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX2-LABEL: define i1 @length12_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i32 @length12(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length12(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX1-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX1-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length12(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX2-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX2-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
+ ret i32 %m
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i32 @length16(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length16(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length16(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(ptr %x, ptr %y) nounwind optsize {
+; X64-SSE2-LABEL: length16_eq:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
+; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
+; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: setne %al
+; X64-SSE2-NEXT: retq
+;
+; X64-AVX-LABEL: length16_eq:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: setne %al
+; X64-AVX-NEXT: retq
+; X64-LABEL: define i1 @length16_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX1-LABEL: define i1 @length16_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX2-LABEL: define i1 @length16_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP3]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_eq_const(ptr %X) nounwind optsize {
+; X64-SSE2-LABEL: length16_eq_const:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: sete %al
+; X64-SSE2-NEXT: retq
+;
+; X64-AVX-LABEL: length16_eq_const:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-LABEL: define i1 @length16_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length16_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length16_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
+
+define i32 @length24(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i32 @length24(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR3:[0-9]+]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length24(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR4:[0-9]+]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length24(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR4:[0-9]+]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
+ ret i32 %m
+}
+
+define i1 @length24_eq(ptr %x, ptr %y) nounwind optsize {
+; X64-SSE2-LABEL: length24_eq:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
+; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
+; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
+; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; X64-SSE2-NEXT: pand %xmm1, %xmm2
+; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: sete %al
+; X64-SSE2-NEXT: retq
+;
+; X64-AVX-LABEL: length24_eq:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-LABEL: define i1 @length24_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length24_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length24_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_eq_const(ptr %X) nounwind optsize {
+; X64-SSE2-LABEL: length24_eq_const:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand %xmm1, %xmm0
+; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: setne %al
+; X64-SSE2-NEXT: retq
+;
+; X64-AVX-LABEL: length24_eq_const:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: setne %al
+; X64-AVX-NEXT: retq
+; X64-LABEL: define i1 @length24_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX1-LABEL: define i1 @length24_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX1-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX1-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX1-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX2-LABEL: define i1 @length24_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX2-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX2-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP8]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length32(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i32 @length32(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR3]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length32(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR4]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length32(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR4]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(ptr %x, ptr %y) nounwind optsize {
+; X64-SSE2-LABEL: length32_eq:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
+; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
+; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
+; X64-SSE2-NEXT: pand %xmm2, %xmm0
+; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: sete %al
+; X64-SSE2-NEXT: retq
+;
+; X64-LABEL: define i1 @length32_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length32_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length32_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_const(ptr %X) nounwind optsize {
+; X64-SSE2-LABEL: length32_eq_const:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand %xmm1, %xmm0
+; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: setne %al
+; X64-SSE2-NEXT: retq
+;
+; X64-LABEL: define i1 @length32_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX1-LABEL: define i1 @length32_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX2-LABEL: define i1 @length32_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length64(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i32 @length64(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR3]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length64(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR4]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length64(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR4]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(ptr %x, ptr %y) nounwind optsize {
+; X64-SSE2-LABEL: length64_eq:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: pushq %rax
+; X64-SSE2-NEXT: movl $64, %edx
+; X64-SSE2-NEXT: callq memcmp
+; X64-SSE2-NEXT: testl %eax, %eax
+; X64-SSE2-NEXT: setne %al
+; X64-SSE2-NEXT: popq %rcx
+; X64-SSE2-NEXT: retq
+;
+; X64-LABEL: define i1 @length64_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR3]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length64_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX2-LABEL: define i1 @length64_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP10]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_eq_const(ptr %X) nounwind optsize {
+; X64-SSE2-LABEL: length64_eq_const:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: pushq %rax
+; X64-SSE2-NEXT: movl $.L.str, %esi
+; X64-SSE2-NEXT: movl $64, %edx
+; X64-SSE2-NEXT: callq memcmp
+; X64-SSE2-NEXT: testl %eax, %eax
+; X64-SSE2-NEXT: sete %al
+; X64-SSE2-NEXT: popq %rcx
+; X64-SSE2-NEXT: retq
+;
+; X64-LABEL: define i1 @length64_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 64) #[[ATTR3]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length64_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX1-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length64_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind optsize {
+; X64-LABEL: define i32 @bcmp_length2(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: ret i32 [[TMP4]]
+;
+; X64-AVX1-LABEL: define i32 @bcmp_length2(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: ret i32 [[TMP4]]
+;
+; X64-AVX2-LABEL: define i32 @bcmp_length2(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: ret i32 [[TMP4]]
+;
+ %m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) nounwind
+ ret i32 %m
+}
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso-x32.ll
new file mode 100644
index 00000000000000..a8b054cd20e270
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso-x32.ll
@@ -0,0 +1,887 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=cmov < %s | FileCheck %s --check-prefix=X86
+; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=X86-SSE2
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+ at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
+
+declare dso_local i32 @memcmp(ptr, ptr, i32)
+declare dso_local i32 @bcmp(ptr, ptr, i32)
+
+define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i32 @length2(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] !prof [[PROF14:![0-9]+]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: ret i32 [[TMP7]]
+;
+; X86-SSE2-LABEL: define i32 @length2(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] !prof [[PROF14:![0-9]+]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: ret i32 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_eq(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i1 @length2_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_const(ptr %X) nounwind !prof !14 {
+; X86-LABEL: define i1 @length2_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-NEXT: ret i1 [[TMP2]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR3:[0-9]+]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR3:[0-9]+]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i32 @length3(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br label [[ENDBLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length3(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br label [[ENDBLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i1 @length3_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X86-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X86-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X86-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE2-LABEL: define i1 @length3_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X86-SSE2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i32 @length4(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X86-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X86-NEXT: ret i32 [[TMP9]]
+;
+; X86-SSE2-LABEL: define i32 @length4(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X86-SSE2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X86-SSE2-NEXT: ret i32 [[TMP9]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i1 @length4_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-NEXT: ret i1 [[TMP3]]
+;
+; X86-SSE2-LABEL: define i1 @length4_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP3]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_eq_const(ptr %X) nounwind !prof !14 {
+; X86-LABEL: define i1 @length4_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length4_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i32 @length5(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br label [[ENDBLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length5(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br label [[ENDBLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i1 @length5_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X86-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X86-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X86-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE2-LABEL: define i1 @length5_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X86-SSE2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i32 @length8(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length8(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i1 @length8_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length8_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length8_eq_const(ptr %X) nounwind !prof !14 {
+; X86-LABEL: define i1 @length8_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE2-LABEL: define i1 @length8_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length12_eq(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i1 @length12_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4:[0-9]+]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length12_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4:[0-9]+]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i32 @length12(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length12(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
+ ret i32 %m
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i32 @length16(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR4]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length16(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR4]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(ptr %x, ptr %y) nounwind !prof !14 {
+; X86-NOSSE-LABEL: length16_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $16
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length16_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR4]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length16_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP3]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_eq_const(ptr %X) nounwind !prof !14 {
+; X86-NOSSE-LABEL: length16_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $16
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length16_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 16) #[[ATTR4]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length16_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
+
+define i32 @length24(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i32 @length24(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR4]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length24(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR4]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind
+ ret i32 %m
+}
+
+define i1 @length24_eq(ptr %x, ptr %y) nounwind !prof !14 {
+; X86-NOSSE-LABEL: length24_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $24
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length24_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR4]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length24_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_eq_const(ptr %X) nounwind !prof !14 {
+; X86-NOSSE-LABEL: length24_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $24
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length24_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR4]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length24_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 68051240286688436651889234231545575736
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length32(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i32 @length32(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR4]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length32(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR4]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(ptr %x, ptr %y) nounwind !prof !14 {
+; X86-NOSSE-LABEL: length32_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length32_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR4]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length32_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_const(ptr %X) nounwind !prof !14 {
+; X86-NOSSE-LABEL: length32_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length32_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR4]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length32_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length64(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i32 @length64(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length64(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(ptr %x, ptr %y) nounwind !prof !14 {
+; X86-LABEL: define i1 @length64_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length64_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_eq_const(ptr %X) nounwind !prof !14 {
+; X86-LABEL: define i1 @length64_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR4]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length64_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR4]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind !prof !14 {
+; X86-LABEL: define i32 @bcmp_length2(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-NEXT: ret i32 [[TMP4]]
+;
+; X86-SSE2-LABEL: define i32 @bcmp_length2(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: ret i32 [[TMP4]]
+;
+ %m = tail call i32 @bcmp(ptr %X, ptr %Y, i32 2) nounwind
+ ret i32 %m
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i32 10000}
+!4 = !{!"MaxCount", i32 10}
+!5 = !{!"MaxInternalCount", i32 1}
+!6 = !{!"MaxFunctionCount", i32 1000}
+!7 = !{!"NumCounts", i32 3}
+!8 = !{!"NumFunctions", i32 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i32 100, i32 1}
+!12 = !{i32 999000, i32 100, i32 1}
+!13 = !{i32 999999, i32 1, i32 2}
+!14 = !{!"function_entry_count", i32 0}
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso.ll
new file mode 100644
index 00000000000000..1507cbdc4e86ec
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso.ll
@@ -0,0 +1,1347 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64
+; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=X64-AVX1
+; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s --check-prefix=X64-AVX2
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+ at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
+
+declare dso_local i32 @memcmp(ptr, ptr, i64)
+declare dso_local i32 @bcmp(ptr, ptr, i64)
+
+define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i32 @length2(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] !prof [[PROF14:![0-9]+]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX1-LABEL: define i32 @length2(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] !prof [[PROF14:![0-9]+]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: ret i32 [[TMP7]]
+;
+; X64-AVX2-LABEL: define i32 @length2(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] !prof [[PROF14:![0-9]+]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: ret i32 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_eq(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i1 @length2_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_const(ptr %X) nounwind !prof !14 {
+; X64-LABEL: define i1 @length2_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2:[0-9]+]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]]
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]]
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i32 @length3(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length3(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br label [[ENDBLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length3(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i1 @length3_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX1-LABEL: define i1 @length3_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX2-LABEL: define i1 @length3_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i32 @length4(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX1-LABEL: define i32 @length4(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX1-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX2-LABEL: define i32 @length4(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX2-NEXT: ret i32 [[TMP9]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i1 @length4_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX1-LABEL: define i1 @length4_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX2-LABEL: define i1 @length4_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP3]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_eq_const(ptr %X) nounwind !prof !14 {
+; X64-LABEL: define i1 @length4_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length4_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length4_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i32 @length5(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length5(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br label [[ENDBLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length5(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br label [[ENDBLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i1 @length5_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX1-LABEL: define i1 @length5_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX2-LABEL: define i1 @length5_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i32 @length8(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX1-LABEL: define i32 @length8(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX1-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX1-NEXT: ret i32 [[TMP9]]
+;
+; X64-AVX2-LABEL: define i32 @length8(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-AVX2-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-AVX2-NEXT: ret i32 [[TMP9]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i1 @length8_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length8_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length8_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length8_eq_const(ptr %X) nounwind !prof !14 {
+; X64-LABEL: define i1 @length8_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX1-LABEL: define i1 @length8_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX2-LABEL: define i1 @length8_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length12_eq(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i1 @length12_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX1-LABEL: define i1 @length12_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP12]]
+;
+; X64-AVX2-LABEL: define i1 @length12_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i32 @length12(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length12(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX1-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX1-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length12(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-AVX2-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-AVX2-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
+ ret i32 %m
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i32 @length16(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: br label [[LOADBB:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb:
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX1-LABEL: define i32 @length16(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX1: res_block:
+; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX1: loadbb:
+; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX1: loadbb1:
+; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX1: endblock:
+; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX1-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-AVX2-LABEL: define i32 @length16(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: br label [[LOADBB:%.*]]
+; X64-AVX2: res_block:
+; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]]
+; X64-AVX2: loadbb:
+; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-AVX2: loadbb1:
+; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-AVX2: endblock:
+; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-AVX2-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(ptr %x, ptr %y) nounwind !prof !14 {
+;
+; X64-AVX-LABEL: length16_eq:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: setne %al
+; X64-AVX-NEXT: retq
+; X64-LABEL: define i1 @length16_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX1-LABEL: define i1 @length16_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP3]]
+;
+; X64-AVX2-LABEL: define i1 @length16_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP3]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_eq_const(ptr %X) nounwind !prof !14 {
+;
+; X64-AVX-LABEL: length16_eq_const:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-LABEL: define i1 @length16_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length16_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length16_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
+
+define i32 @length24(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i32 @length24(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length24(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR4:[0-9]+]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length24(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR4:[0-9]+]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
+ ret i32 %m
+}
+
+define i1 @length24_eq(ptr %x, ptr %y) nounwind !prof !14 {
+;
+; X64-AVX-LABEL: length24_eq:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+; X64-LABEL: define i1 @length24_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length24_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX1-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length24_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; X64-AVX2-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_eq_const(ptr %X) nounwind !prof !14 {
+;
+; X64-AVX-LABEL: length24_eq_const:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: setne %al
+; X64-AVX-NEXT: retq
+; X64-LABEL: define i1 @length24_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX1-LABEL: define i1 @length24_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX1-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX1-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX1-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP8]]
+;
+; X64-AVX2-LABEL: define i1 @length24_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; X64-AVX2-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
+; X64-AVX2-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; X64-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP8]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length32(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i32 @length32(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length32(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR4]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length32(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR4]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(ptr %x, ptr %y) nounwind !prof !14 {
+;
+; X64-LABEL: define i1 @length32_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length32_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX1-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX2-LABEL: define i1 @length32_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; X64-AVX2-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_const(ptr %X) nounwind !prof !14 {
+;
+; X64-LABEL: define i1 @length32_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X64-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X64-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-NEXT: ret i1 [[TMP7]]
+;
+; X64-AVX1-LABEL: define i1 @length32_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP2]]
+;
+; X64-AVX2-LABEL: define i1 @length32_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length64(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i32 @length64(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]]
+; X64-NEXT: ret i32 [[M]]
+;
+; X64-AVX1-LABEL: define i32 @length64(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR4]]
+; X64-AVX1-NEXT: ret i32 [[M]]
+;
+; X64-AVX2-LABEL: define i32 @length64(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR4]]
+; X64-AVX2-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(ptr %x, ptr %y) nounwind !prof !14 {
+;
+; X64-LABEL: define i1 @length64_eq(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]]
+; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X64-NEXT: ret i1 [[CMP]]
+;
+; X64-AVX1-LABEL: define i1 @length64_eq(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX1-NEXT: ret i1 [[TMP10]]
+;
+; X64-AVX2-LABEL: define i1 @length64_eq(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32
+; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1
+; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1
+; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]]
+; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]]
+; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0
+; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64-AVX2-NEXT: ret i1 [[TMP10]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_eq_const(ptr %X) nounwind !prof !14 {
+;
+; X64-LABEL: define i1 @length64_eq_const(
+; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 64) #[[ATTR0]]
+; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X64-NEXT: ret i1 [[C]]
+;
+; X64-AVX1-LABEL: define i1 @length64_eq_const(
+; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX1-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX1-NEXT: ret i1 [[C]]
+;
+; X64-AVX2-LABEL: define i1 @length64_eq_const(
+; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392
+; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32
+; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1
+; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306
+; X64-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]]
+; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0
+; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
+; X64-AVX2-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind !prof !14 {
+; X64-LABEL: define i32 @bcmp_length2(
+; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: ret i32 [[TMP4]]
+;
+; X64-AVX1-LABEL: define i32 @bcmp_length2(
+; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX1-NEXT: ret i32 [[TMP4]]
+;
+; X64-AVX2-LABEL: define i32 @bcmp_length2(
+; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] {
+; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-AVX2-NEXT: ret i32 [[TMP4]]
+;
+ %m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) nounwind
+ ret i32 %m
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32-2.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32-2.ll
new file mode 100644
index 00000000000000..8c86c110c7bb2b
--- /dev/null
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32-2.ll
@@ -0,0 +1,4813 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=cmov < %s | FileCheck %s --check-prefix=X86
+; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse < %s | FileCheck %s --check-prefix=X86-SSE1
+; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=X86-SSE2
+; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=X86-SSE41
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+ at .str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1
+
+declare dso_local i32 @memcmp(ptr, ptr, i32)
+
+define i32 @length0(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length0(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X86-NEXT: ret i32 0
+;
+; X86-SSE1-LABEL: define i32 @length0(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X86-SSE1-NEXT: ret i32 0
+;
+; X86-SSE2-LABEL: define i32 @length0(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X86-SSE2-NEXT: ret i32 0
+;
+; X86-SSE41-LABEL: define i32 @length0(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; X86-SSE41-NEXT: ret i32 0
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind
+ ret i32 %m
+ }
+
+define i1 @length0_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length0_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: ret i1 true
+;
+; X86-SSE1-LABEL: define i1 @length0_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: ret i1 true
+;
+; X86-SSE2-LABEL: define i1 @length0_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: ret i1 true
+;
+; X86-SSE41-LABEL: define i1 @length0_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: ret i1 true
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length0_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: ret i1 false
+;
+; X86-SSE1-LABEL: define i1 @length0_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: ret i1 false
+;
+; X86-SSE2-LABEL: define i1 @length0_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: ret i1 false
+;
+; X86-SSE41-LABEL: define i1 @length0_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: ret i1 false
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length2(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length2(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: ret i32 [[TMP7]]
+;
+; X86-SSE1-LABEL: define i32 @length2(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: ret i32 [[TMP7]]
+;
+; X86-SSE2-LABEL: define i32 @length2(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: ret i32 [[TMP7]]
+;
+; X86-SSE41-LABEL: define i32 @length2(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: ret i32 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ ret i32 %m
+}
+
+define i32 @length2_const(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length2_const(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X86-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X86-NEXT: ret i32 [[TMP4]]
+;
+; X86-SSE1-LABEL: define i32 @length2_const(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X86-SSE1-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X86-SSE1-NEXT: ret i32 [[TMP4]]
+;
+; X86-SSE2-LABEL: define i32 @length2_const(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X86-SSE2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X86-SSE2-NEXT: ret i32 [[TMP4]]
+;
+; X86-SSE41-LABEL: define i32 @length2_const(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X86-SSE41-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X86-SSE41-NEXT: ret i32 [[TMP4]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length2_gt_const(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X86-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X86-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length2_gt_const(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X86-SSE1-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X86-SSE1-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_gt_const(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X86-SSE2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X86-SSE2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length2_gt_const(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; X86-SSE41-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594
+; X86-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length2_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length2_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length2_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length2_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length2_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length2_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length2_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length2_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length2_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X86-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X86-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X86-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length2_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-NEXT: ret i1 [[TMP2]]
+;
+; X86-SSE1-LABEL: define i1 @length2_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X86-SSE1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP2]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP2]]
+;
+; X86-SSE41-LABEL: define i1 @length2_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length3(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length3(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br label [[ENDBLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE1-LABEL: define i32 @length3(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br label [[ENDBLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length3(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br label [[ENDBLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE41-LABEL: define i32 @length3(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br label [[ENDBLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length3_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X86-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X86-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X86-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE1-LABEL: define i1 @length3_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X86-SSE1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X86-SSE1-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X86-SSE1-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X86-SSE1-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE2-LABEL: define i1 @length3_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X86-SSE2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE41-LABEL: define i1 @length3_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X86-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X86-SSE41-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X86-SSE41-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X86-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length4(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X86-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X86-NEXT: ret i32 [[TMP9]]
+;
+; X86-SSE1-LABEL: define i32 @length4(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X86-SSE1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X86-SSE1-NEXT: ret i32 [[TMP9]]
+;
+; X86-SSE2-LABEL: define i32 @length4(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X86-SSE2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X86-SSE2-NEXT: ret i32 [[TMP9]]
+;
+; X86-SSE41-LABEL: define i32 @length4(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE41-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE41-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X86-SSE41-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X86-SSE41-NEXT: ret i32 [[TMP9]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length4_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-NEXT: ret i1 [[TMP3]]
+;
+; X86-SSE1-LABEL: define i1 @length4_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP3]]
+;
+; X86-SSE2-LABEL: define i1 @length4_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP3]]
+;
+; X86-SSE41-LABEL: define i1 @length4_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP3]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length4_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-NEXT: ret i1 [[TMP5]]
+;
+; X86-SSE1-LABEL: define i1 @length4_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE1-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE1-NEXT: ret i1 [[TMP5]]
+;
+; X86-SSE2-LABEL: define i1 @length4_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE2-NEXT: ret i1 [[TMP5]]
+;
+; X86-SSE41-LABEL: define i1 @length4_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE41-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X86-SSE41-NEXT: ret i1 [[TMP5]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length4_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-NEXT: ret i1 [[TMP5]]
+;
+; X86-SSE1-LABEL: define i1 @length4_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE1-NEXT: ret i1 [[TMP5]]
+;
+; X86-SSE2-LABEL: define i1 @length4_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE2-NEXT: ret i1 [[TMP5]]
+;
+; X86-SSE41-LABEL: define i1 @length4_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X86-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X86-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X86-SSE41-NEXT: ret i1 [[TMP5]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length4_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length4_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length4_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X86-SSE1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length4_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length4_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length5(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length5(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br label [[ENDBLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE1-LABEL: define i32 @length5(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br label [[ENDBLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length5(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br label [[ENDBLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE41-LABEL: define i32 @length5(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br label [[ENDBLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length5_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X86-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X86-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X86-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE1-LABEL: define i1 @length5_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X86-SSE1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X86-SSE1-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X86-SSE1-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X86-SSE1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE2-LABEL: define i1 @length5_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X86-SSE2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP12]]
+;
+; X86-SSE41-LABEL: define i1 @length5_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X86-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X86-SSE41-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X86-SSE41-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X86-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP12]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length5_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br label [[ENDBLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length5_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br label [[ENDBLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length5_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br label [[ENDBLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length5_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X86-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br label [[ENDBLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length7(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length7(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE1-LABEL: define i32 @length7(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length7(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE41-LABEL: define i32 @length7(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind
+ ret i32 %m
+}
+
+define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length7_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length7_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length7_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length7_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length7_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length7_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-NEXT: ret i1 [[TMP10]]
+;
+; X86-SSE1-LABEL: define i1 @length7_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE1-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP10]]
+;
+; X86-SSE2-LABEL: define i1 @length7_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP10]]
+;
+; X86-SSE41-LABEL: define i1 @length7_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP10]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length8(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: br label [[LOADBB:%.*]]
+; X86: res_block:
+; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-NEXT: br label [[ENDBLOCK:%.*]]
+; X86: loadbb:
+; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86: loadbb1:
+; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86: endblock:
+; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE1-LABEL: define i32 @length8(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE1: res_block:
+; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE1: loadbb:
+; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE1: loadbb1:
+; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE1: endblock:
+; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE1-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE2-LABEL: define i32 @length8(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE2: res_block:
+; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE2: loadbb:
+; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE2: loadbb1:
+; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE2: endblock:
+; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE2-NEXT: ret i32 [[PHI_RES]]
+;
+; X86-SSE41-LABEL: define i32 @length8(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: br label [[LOADBB:%.*]]
+; X86-SSE41: res_block:
+; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]]
+; X86-SSE41: loadbb:
+; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X86-SSE41: loadbb1:
+; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X86-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X86-SSE41: endblock:
+; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X86-SSE41-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length8_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length8_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE1-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length8_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length8_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length8_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length8_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE1-LABEL: define i1 @length8_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE1-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-SSE1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-SSE1-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-SSE1-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE1-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE2-LABEL: define i1 @length8_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE41-LABEL: define i1 @length8_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408
+; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444
+; X86-SSE41-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length9_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 9) #[[ATTR5:[0-9]+]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length9_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 9) #[[ATTR5:[0-9]+]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length9_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 9) #[[ATTR5:[0-9]+]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length9_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 9) #[[ATTR5:[0-9]+]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length10_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length10_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 10) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length10_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 10) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length10_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 10) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length10_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 10) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 10) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length11_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length11_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 11) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length11_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 11) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length11_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 11) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length11_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 11) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 11) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length12_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length12_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length12_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length12_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length12_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length12(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length12(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length12(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length12(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind
+ ret i32 %m
+}
+
+define i1 @length13_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length13_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 13) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length13_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 13) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length13_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 13) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length13_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 13) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 13) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length14_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length14_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 14) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length14_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 14) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length14_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 14) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length14_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 14) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 14) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length15(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length15(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length15(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length15(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length15(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind
+ ret i32 %m
+}
+
+define i1 @length15_lt(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length15_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp slt i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length15_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length15_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length15_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length15_const(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length15_const(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length15_const(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length15_const(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length15_const(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) nounwind
+ ret i32 %m
+}
+
+define i1 @length15_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length15_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length15_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length15_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length15_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @length15_gt_const(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp sgt i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length15_gt_const(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp sgt i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length15_gt_const(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp sgt i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length15_gt_const(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) nounwind
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length16(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length16(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length16(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length16(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(ptr %x, ptr %y) nounwind {
+; X86-NOSSE-LABEL: length16_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $16
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length16_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length16_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length16_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP3]]
+;
+; X86-SSE41-LABEL: define i1 @length16_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP3]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length16_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length16_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length16_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length16_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length16_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length16_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length16_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length16_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_eq_const(ptr %X) nounwind {
+; X86-NOSSE-LABEL: length16_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $16
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length16_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 16) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length16_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 16) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length16_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length16_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
+
+define i32 @length24(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length24(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length24(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length24(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length24(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind
+ ret i32 %m
+}
+
+define i1 @length24_eq(ptr %x, ptr %y) nounwind {
+; X86-NOSSE-LABEL: length24_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $24
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length24_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length24_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length24_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length24_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length24_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length24_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length24_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length24_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length24_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length24_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length24_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length24_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_eq_const(ptr %X) nounwind {
+; X86-NOSSE-LABEL: length24_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $24
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length24_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length24_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length24_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 68051240286688436651889234231545575736
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE41-LABEL: define i1 @length24_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 68051240286688436651889234231545575736
+; X86-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length31(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length31(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length31(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length31(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length31(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 31) nounwind
+ ret i32 %m
+}
+
+define i1 @length31_eq(ptr %x, ptr %y) nounwind {
+; X86-NOSSE-LABEL: length31_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $31
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length31_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length31_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length31_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length31_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length31_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length31_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length31_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length31_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length31_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length31_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length31_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length31_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+; X86-NOSSE-LABEL: length31_eq_prefer128:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $31
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length31_eq_prefer128(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length31_eq_prefer128(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length31_eq_prefer128(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length31_eq_prefer128(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_eq_const(ptr %X) nounwind {
+; X86-NOSSE-LABEL: length31_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $31
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length31_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 31) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length31_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 31) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length31_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE41-LABEL: define i1 @length31_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973
+; X86-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 31) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length32(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length32(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length32(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length32(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length32(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(ptr %x, ptr %y) nounwind {
+; X86-NOSSE-LABEL: length32_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length32_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length32_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length32_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length32_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length32_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length32_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length32_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length32_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length32_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length32_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length32_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length32_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+; X86-NOSSE-LABEL: length32_eq_prefer128:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length32_eq_prefer128(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length32_eq_prefer128(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length32_eq_prefer128(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length32_eq_prefer128(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_const(ptr %X) nounwind {
+; X86-NOSSE-LABEL: length32_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-LABEL: define i1 @length32_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length32_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length32_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE2-NEXT: ret i1 [[TMP7]]
+;
+; X86-SSE41-LABEL: define i1 @length32_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; X86-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; X86-SSE41-NEXT: ret i1 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length48(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length48(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length48(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length48(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length48(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 48) nounwind
+ ret i32 %m
+}
+
+define i1 @length48_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length48_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length48_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length48_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length48_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length48_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length48_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length48_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length48_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length48_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length48_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length48_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length48_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" {
+; X86-LABEL: define i1 @length48_eq_prefer128(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length48_eq_prefer128(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length48_eq_prefer128(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length48_eq_prefer128(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length48_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 48) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length48_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 48) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length48_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 48) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length48_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 48) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 48) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length63(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length63(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length63(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length63(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length63(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 63) nounwind
+ ret i32 %m
+}
+
+define i1 @length63_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length63_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length63_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length63_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length63_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length63_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length63_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length63_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length63_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length63_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length63_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length63_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length63_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length63_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 63) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length63_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 63) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length63_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 63) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length63_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 63) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 63) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length64(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length64(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length64(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length64(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length64(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length64_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length64_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length64_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length64_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length64_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length64_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length64_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length64_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length64_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length64_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length64_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length64_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length64_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length64_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length64_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length64_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length96(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length96(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length96(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length96(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length96(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 96) nounwind
+ ret i32 %m
+}
+
+define i1 @length96_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length96_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length96_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length96_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length96_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length96_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length96_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length96_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length96_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length96_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length96_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length96_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length96_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length96_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length96_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length96_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length96_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 96) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length127(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length127(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length127(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length127(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length127(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 127) nounwind
+ ret i32 %m
+}
+
+define i1 @length127_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length127_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length127_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length127_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length127_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length127_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length127_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length127_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length127_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length127_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length127_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length127_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length127_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length127_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length127_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length127_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length127_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 127) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length128(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length128(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length128(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length128(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length128(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 128) nounwind
+ ret i32 %m
+}
+
+define i1 @length128_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length128_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length128_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length128_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length128_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length128_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length128_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length128_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length128_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length128_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length128_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length128_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length128_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length128_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length128_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length128_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length128_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 128) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length192(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length192(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length192(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length192(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length192(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 192) nounwind
+ ret i32 %m
+}
+
+define i1 @length192_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length192_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length192_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length192_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length192_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length192_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length192_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length192_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length192_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length192_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length192_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length192_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length192_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length192_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length192_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length192_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length192_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 192) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length255(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length255(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length255(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length255(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length255(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 255) nounwind
+ ret i32 %m
+}
+
+define i1 @length255_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length255_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length255_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length255_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length255_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length255_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length255_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length255_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length255_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length255_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length255_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length255_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length255_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length255_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length255_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length255_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length255_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 255) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length256(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length256(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length256(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length256(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length256(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 256) nounwind
+ ret i32 %m
+}
+
+define i1 @length256_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length256_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length256_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length256_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length256_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length256_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length256_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length256_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length256_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length256_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length256_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length256_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length256_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length256_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length256_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length256_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length256_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 256) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length384(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length384(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length384(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length384(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length384(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 384) nounwind
+ ret i32 %m
+}
+
+define i1 @length384_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length384_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length384_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length384_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length384_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length384_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length384_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length384_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length384_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length384_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length384_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length384_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length384_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length384_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length384_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length384_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length384_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 384) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length511(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length511(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length511(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length511(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length511(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 511) nounwind
+ ret i32 %m
+}
+
+define i1 @length511_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length511_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length511_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length511_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length511_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length511_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length511_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length511_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length511_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length511_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length511_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length511_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length511_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length511_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length511_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length511_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length511_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 511) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length512(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @length512(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @length512(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @length512(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @length512(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 512) nounwind
+ ret i32 %m
+}
+
+define i1 @length512_eq(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length512_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length512_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length512_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length512_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_lt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length512_lt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length512_lt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length512_lt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length512_lt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_gt(ptr %x, ptr %y) nounwind {
+; X86-LABEL: define i1 @length512_gt(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE1-LABEL: define i1 @length512_gt(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE1-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE2-LABEL: define i1 @length512_gt(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE2-NEXT: ret i1 [[CMP]]
+;
+; X86-SSE41-LABEL: define i1 @length512_gt(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]]
+; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
+; X86-SSE41-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_eq_const(ptr %X) nounwind {
+; X86-LABEL: define i1 @length512_eq_const(
+; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @length512_eq_const(
+; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @length512_eq_const(
+; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @length512_eq_const(
+; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 512) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; This checks that we do not do stupid things with huge sizes.
+define i32 @huge_length(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i32 @huge_length(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @huge_length(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @huge_length(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @huge_length(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind
+ ret i32 %m
+}
+
+define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind {
+; X86-LABEL: define i1 @huge_length_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @huge_length_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @huge_length_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @huge_length_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; This checks non-constant sizes.
+define i32 @nonconst_length(ptr %X, ptr %Y, i32 %size) nounwind {
+; X86-LABEL: define i32 @nonconst_length(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-NEXT: ret i32 [[M]]
+;
+; X86-SSE1-LABEL: define i32 @nonconst_length(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-SSE1-NEXT: ret i32 [[M]]
+;
+; X86-SSE2-LABEL: define i32 @nonconst_length(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-SSE2-NEXT: ret i32 [[M]]
+;
+; X86-SSE41-LABEL: define i32 @nonconst_length(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-SSE41-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind
+ ret i32 %m
+}
+
+define i1 @nonconst_length_eq(ptr %X, ptr %Y, i32 %size) nounwind {
+; X86-LABEL: define i1 @nonconst_length_eq(
+; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-NEXT: ret i1 [[C]]
+;
+; X86-SSE1-LABEL: define i1 @nonconst_length_eq(
+; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE1-NEXT: ret i1 [[C]]
+;
+; X86-SSE2-LABEL: define i1 @nonconst_length_eq(
+; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE2-NEXT: ret i1 [[C]]
+;
+; X86-SSE41-LABEL: define i1 @nonconst_length_eq(
+; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] {
+; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]]
+; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; X86-SSE41-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
index d71ae8be19b668..5a0f4db363536d 100644
--- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
@@ -1,64 +1,66 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -expand-memcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32
declare i32 @memcmp(ptr nocapture, ptr nocapture, i32)
define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp2(
-; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
-; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
-; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: ret i32 [[TMP9]]
+; X32-LABEL: define i32 @cmp2(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X32-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X32-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X32-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: ret i32 [[TMP7]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 2)
ret i32 %call
}
define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonly align 2 %y) {
-; X32-LABEL: @cmp2_align2(
-; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 2
-; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 2
-; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
-; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
-; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: ret i32 [[TMP9]]
+; X32-LABEL: define i32 @cmp2_align2(
+; X32-SAME: ptr nocapture readonly align 2 [[X:%.*]], ptr nocapture readonly align 2 [[Y:%.*]]) {
+; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 2
+; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 2
+; X32-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X32-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X32-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: ret i32 [[TMP7]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 2)
ret i32 %call
}
define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp3(
+; X32-LABEL: define i32 @cmp3(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X32-NEXT: br label [[LOADBB:%.*]]
; X32: res_block:
-; X32-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]]
+; X32-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb:
-; X32-NEXT: [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
-; X32-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
-; X32-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
-; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X32-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X32-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X32: loadbb1:
-; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2
-; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2
-; X32-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
-; X32-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1
-; X32-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
-; X32-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X32-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X32-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X32-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X32-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X32-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
; X32-NEXT: br label [[ENDBLOCK]]
; X32: endblock:
-; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 3)
@@ -66,47 +68,49 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp4(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp4(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
-; X32-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]]
-; X32-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]]
-; X32-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
-; X32-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
-; X32-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
-; X32-NEXT: ret i32 [[TMP11]]
+; X32-LABEL: define i32 @cmp4(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X32-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X32-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X32-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X32-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X32-NEXT: ret i32 [[TMP9]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 4)
ret i32 %call
}
define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp5(
+; X32-LABEL: define i32 @cmp5(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X32-NEXT: br label [[LOADBB:%.*]]
; X32: res_block:
-; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP7:%.*]], [[TMP8:%.*]]
+; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb:
-; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X32: loadbb1:
-; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
-; X32-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1
-; X32-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
-; X32-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X32-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X32-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X32-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X32-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
; X32-NEXT: br label [[ENDBLOCK]]
; X32: endblock:
-; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 5)
@@ -114,32 +118,33 @@ define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp6(
+; X32-LABEL: define i32 @cmp6(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X32-NEXT: br label [[LOADBB:%.*]]
; X32: res_block:
-; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ]
-; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb:
-; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X32: loadbb1:
-; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP10]], align 1
-; X32-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP11]], align 1
-; X32-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
-; X32-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
-; X32-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
-; X32-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32
-; X32-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]]
-; X32-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1
+; X32-NEXT: [[TMP12:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP10]])
+; X32-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP11]])
+; X32-NEXT: [[TMP14]] = zext i16 [[TMP12]] to i32
+; X32-NEXT: [[TMP15]] = zext i16 [[TMP13]] to i32
+; X32-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP14]], [[TMP15]]
+; X32-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X32: endblock:
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
@@ -149,30 +154,31 @@ define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp7(
+; X32-LABEL: define i32 @cmp7(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X32-NEXT: br label [[LOADBB:%.*]]
; X32: res_block:
-; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb:
-; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X32: loadbb1:
-; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X32-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1
-; X32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1
-; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
-; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
-; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]]
-; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X32-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X32-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X32-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X32-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X32: endblock:
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
@@ -182,30 +188,31 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp8(
+; X32-LABEL: define i32 @cmp8(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X32-NEXT: br label [[LOADBB:%.*]]
; X32: res_block:
-; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb:
-; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X32: loadbb1:
-; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1
-; X32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1
-; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
-; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
-; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]]
-; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X32-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X32-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X32-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X32-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X32: endblock:
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
@@ -215,8 +222,9 @@ define i32 @cmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp9(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp9(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 9)
+; X32-LABEL: define i32 @cmp9(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 9)
; X32-NEXT: ret i32 [[CALL]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 9)
@@ -224,8 +232,9 @@ define i32 @cmp9(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp10(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp10(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 10)
+; X32-LABEL: define i32 @cmp10(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 10)
; X32-NEXT: ret i32 [[CALL]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 10)
@@ -233,8 +242,9 @@ define i32 @cmp10(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp11(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp11(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 11)
+; X32-LABEL: define i32 @cmp11(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 11)
; X32-NEXT: ret i32 [[CALL]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 11)
@@ -242,8 +252,9 @@ define i32 @cmp11(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp12(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp12(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 12)
+; X32-LABEL: define i32 @cmp12(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12)
; X32-NEXT: ret i32 [[CALL]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 12)
@@ -251,8 +262,9 @@ define i32 @cmp12(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp13(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp13(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 13)
+; X32-LABEL: define i32 @cmp13(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 13)
; X32-NEXT: ret i32 [[CALL]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 13)
@@ -260,8 +272,9 @@ define i32 @cmp13(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp14(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp14(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 14)
+; X32-LABEL: define i32 @cmp14(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 14)
; X32-NEXT: ret i32 [[CALL]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 14)
@@ -269,8 +282,9 @@ define i32 @cmp14(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp15(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp15(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 15)
+; X32-LABEL: define i32 @cmp15(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15)
; X32-NEXT: ret i32 [[CALL]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 15)
@@ -278,8 +292,9 @@ define i32 @cmp15(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp16(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 16)
+; X32-LABEL: define i32 @cmp16(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16)
; X32-NEXT: ret i32 [[CALL]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16)
@@ -287,12 +302,13 @@ define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq2(
-; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X32-LABEL: define i32 @cmp_eq2(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X32-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -303,21 +319,22 @@ define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq3(
-; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2
-; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2
-; X32-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X32-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
-; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
-; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
-; X32-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]]
-; X32-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0
-; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X32-LABEL: define i32 @cmp_eq3(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X32-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X32-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X32-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X32-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X32-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X32-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X32-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X32-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -328,12 +345,13 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq4(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X32-LABEL: define i32 @cmp_eq4(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X32-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -344,21 +362,22 @@ define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq5(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X32-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
-; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
-; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
-; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
-; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X32-LABEL: define i32 @cmp_eq5(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X32-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X32-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X32-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X32-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X32-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -369,21 +388,22 @@ define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq6(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
-; X32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1
-; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
-; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
-; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
-; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
-; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
-; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X32-LABEL: define i32 @cmp_eq6(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X32-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
+; X32-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32
+; X32-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X32-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -394,21 +414,22 @@ define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture readonly align 4 %y) {
-; X32-LABEL: @cmp_eq6_align4(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 4
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 4
-; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 4
-; X32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 4
-; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
-; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
-; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
-; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
-; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
-; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X32-LABEL: define i32 @cmp_eq6_align4(
+; X32-SAME: ptr nocapture readonly align 4 [[X:%.*]], ptr nocapture readonly align 4 [[Y:%.*]]) {
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4
+; X32-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4
+; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
+; X32-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32
+; X32-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X32-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -419,19 +440,20 @@ define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture read
}
define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq7(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1
-; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1
-; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
-; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
-; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X32-LABEL: define i32 @cmp_eq7(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X32-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X32-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X32-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X32-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -442,19 +464,20 @@ define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq8(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1
-; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1
-; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
-; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
-; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X32-LABEL: define i32 @cmp_eq8(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X32-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X32-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X32-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X32-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -465,8 +488,9 @@ define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq9(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 9)
+; X32-LABEL: define i32 @cmp_eq9(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 9)
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
@@ -478,8 +502,9 @@ define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq10(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 10)
+; X32-LABEL: define i32 @cmp_eq10(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 10)
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
@@ -491,8 +516,9 @@ define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq11(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 11)
+; X32-LABEL: define i32 @cmp_eq11(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 11)
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
@@ -504,8 +530,9 @@ define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq12(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 12)
+; X32-LABEL: define i32 @cmp_eq12(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12)
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
@@ -517,8 +544,9 @@ define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq13(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 13)
+; X32-LABEL: define i32 @cmp_eq13(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 13)
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
@@ -530,8 +558,9 @@ define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq14(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 14)
+; X32-LABEL: define i32 @cmp_eq14(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 14)
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
@@ -543,8 +572,9 @@ define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq15(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 15)
+; X32-LABEL: define i32 @cmp_eq15(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15)
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
@@ -556,8 +586,9 @@ define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X32-LABEL: @cmp_eq16(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 16)
+; X32-LABEL: define i32 @cmp_eq16(
+; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16)
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
index f686e29975564f..99100aad3ee84a 100644
--- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
@@ -1,66 +1,67 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD
-; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_2LD
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD
; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_2LD
declare i32 @memcmp(ptr nocapture, ptr nocapture, i64)
define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp2(
-; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
-; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X64-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
-; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
-; X64-NEXT: ret i32 [[TMP9]]
+; X64-LABEL: define i32 @cmp2(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: ret i32 [[TMP7]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2)
ret i32 %call
}
define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonly align 2 %y) {
-; X64-LABEL: @cmp2_align2(
-; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 2
-; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 2
-; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
-; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X64-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
-; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
-; X64-NEXT: ret i32 [[TMP9]]
+; X64-LABEL: define i32 @cmp2_align2(
+; X64-SAME: ptr nocapture readonly align 2 [[X:%.*]], ptr nocapture readonly align 2 [[Y:%.*]]) {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 2
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 2
+; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: ret i32 [[TMP7]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2)
ret i32 %call
}
define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp3(
+; X64-LABEL: define i32 @cmp3(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2
-; X64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
-; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
; X64-NEXT: br label [[ENDBLOCK]]
; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 3)
@@ -68,47 +69,49 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp4(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp4(
-; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
-; X64-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]]
-; X64-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]]
-; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
-; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
-; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
-; X64-NEXT: ret i32 [[TMP11]]
+; X64-LABEL: define i32 @cmp4(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-NEXT: ret i32 [[TMP9]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4)
ret i32 %call
}
define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp5(
+; X64-LABEL: define i32 @cmp5(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP7:%.*]], [[TMP8:%.*]]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
-; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
; X64-NEXT: br label [[ENDBLOCK]]
; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 5)
@@ -116,32 +119,33 @@ define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp6(
+; X64-LABEL: define i32 @cmp6(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
-; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
-; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
-; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32
-; X64-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]]
-; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP10]])
+; X64-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP11]])
+; X64-NEXT: [[TMP14]] = zext i16 [[TMP12]] to i32
+; X64-NEXT: [[TMP15]] = zext i16 [[TMP13]] to i32
+; X64-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP14]], [[TMP15]]
+; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -151,30 +155,31 @@ define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp7(
+; X64-LABEL: define i32 @cmp7(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X64-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
-; X64-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]]
-; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -184,47 +189,49 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp8(
-; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
-; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]]
-; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]]
-; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
-; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
-; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
-; X64-NEXT: ret i32 [[TMP11]]
+; X64-LABEL: define i32 @cmp8(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-NEXT: ret i32 [[TMP9]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 8)
ret i32 %call
}
define i32 @cmp9(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp9(
+; X64-LABEL: define i32 @cmp9(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP7:%.*]], [[TMP8:%.*]]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP5:%.*]], [[TMP6:%.*]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
-; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
; X64-NEXT: br label [[ENDBLOCK]]
; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 9)
@@ -232,32 +239,33 @@ define i32 @cmp9(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp10(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp10(
+; X64-LABEL: define i32 @cmp10(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
-; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
-; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64
-; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64
-; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]]
-; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP10]])
+; X64-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP11]])
+; X64-NEXT: [[TMP14]] = zext i16 [[TMP12]] to i64
+; X64-NEXT: [[TMP15]] = zext i16 [[TMP13]] to i64
+; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -267,30 +275,31 @@ define i32 @cmp10(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp11(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp11(
+; X64-LABEL: define i32 @cmp11(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
-; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
-; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -300,32 +309,33 @@ define i32 @cmp11(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp12(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp12(
+; X64-LABEL: define i32 @cmp12(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
-; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
-; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64
-; X64-NEXT: [[TMP19]] = zext i32 [[TMP17]] to i64
-; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]]
-; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; X64-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64
+; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -335,30 +345,31 @@ define i32 @cmp12(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp13(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp13(
+; X64-LABEL: define i32 @cmp13(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 5
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 5
-; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
-; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
-; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -368,30 +379,31 @@ define i32 @cmp13(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp14(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp14(
+; X64-LABEL: define i32 @cmp14(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 6
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 6
-; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
-; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
-; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -401,30 +413,31 @@ define i32 @cmp14(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp15(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp15(
+; X64-LABEL: define i32 @cmp15(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 7
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 7
-; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
-; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
-; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -434,30 +447,31 @@ define i32 @cmp15(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp16(
+; X64-LABEL: define i32 @cmp16(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
-; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
-; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -467,12 +481,13 @@ define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp_eq2(
-; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
-; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X64-LABEL: define i32 @cmp_eq2(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64-NEXT: ret i32 [[CONV]]
;
@@ -483,43 +498,45 @@ define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64_1LD-LABEL: @cmp_eq3(
+; X64_1LD-LABEL: define i32 @cmp_eq3(
+; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2
-; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
-; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
-; X64_2LD-LABEL: @cmp_eq3(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2
-; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
-; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-LABEL: define i32 @cmp_eq3(
+; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -530,12 +547,13 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp_eq4(
-; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X64-LABEL: define i32 @cmp_eq4(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64-NEXT: ret i32 [[CONV]]
;
@@ -546,43 +564,45 @@ define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64_1LD-LABEL: @cmp_eq5(
+; X64_1LD-LABEL: define i32 @cmp_eq5(
+; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
-; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
-; X64_2LD-LABEL: @cmp_eq5(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
-; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-LABEL: define i32 @cmp_eq5(
+; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -593,43 +613,45 @@ define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64_1LD-LABEL: @cmp_eq6(
+; X64_1LD-LABEL: define i32 @cmp_eq6(
+; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
-; X64_2LD-LABEL: @cmp_eq6(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
-; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
-; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
-; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
-; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
-; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64_2LD-LABEL: define i32 @cmp_eq6(
+; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -640,43 +662,45 @@ define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture readonly align 4 %y) {
-; X64_1LD-LABEL: @cmp_eq6_align4(
+; X64_1LD-LABEL: define i32 @cmp_eq6_align4(
+; X64_1LD-SAME: ptr nocapture readonly align 4 [[X:%.*]], ptr nocapture readonly align 4 [[Y:%.*]]) {
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 4
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 4
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 4
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 4
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
-; X64_2LD-LABEL: @cmp_eq6_align4(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 4
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 4
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 4
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 4
-; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
-; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
-; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
-; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
-; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
-; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64_2LD-LABEL: define i32 @cmp_eq6_align4(
+; X64_2LD-SAME: ptr nocapture readonly align 4 [[X:%.*]], ptr nocapture readonly align 4 [[Y:%.*]]) {
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -687,41 +711,43 @@ define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture read
}
define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64_1LD-LABEL: @cmp_eq7(
+; X64_1LD-LABEL: define i32 @cmp_eq7(
+; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
-; X64_2LD-LABEL: @cmp_eq7(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-LABEL: define i32 @cmp_eq7(
+; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -732,12 +758,13 @@ define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp_eq8(
-; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X64-LABEL: define i32 @cmp_eq8(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64-NEXT: ret i32 [[CONV]]
;
@@ -748,43 +775,45 @@ define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64_1LD-LABEL: @cmp_eq9(
+; X64_1LD-LABEL: define i32 @cmp_eq9(
+; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
-; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
-; X64_2LD-LABEL: @cmp_eq9(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i64
-; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i64
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-LABEL: define i32 @cmp_eq9(
+; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -795,43 +824,45 @@ define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64_1LD-LABEL: @cmp_eq10(
+; X64_1LD-LABEL: define i32 @cmp_eq10(
+; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
-; X64_2LD-LABEL: @cmp_eq10(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64
-; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64
-; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
-; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]
-; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
-; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64_2LD-LABEL: define i32 @cmp_eq10(
+; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -842,41 +873,43 @@ define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64_1LD-LABEL: @cmp_eq11(
+; X64_1LD-LABEL: define i32 @cmp_eq11(
+; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
-; X64_2LD-LABEL: @cmp_eq11(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-LABEL: define i32 @cmp_eq11(
+; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -887,43 +920,45 @@ define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64_1LD-LABEL: @cmp_eq12(
+; X64_1LD-LABEL: define i32 @cmp_eq12(
+; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
-; X64_2LD-LABEL: @cmp_eq12(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
-; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
-; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
-; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]
-; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
-; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64_2LD-LABEL: define i32 @cmp_eq12(
+; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -934,41 +969,43 @@ define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64_1LD-LABEL: @cmp_eq13(
+; X64_1LD-LABEL: define i32 @cmp_eq13(
+; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 5
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 5
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
-; X64_2LD-LABEL: @cmp_eq13(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 5
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 5
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-LABEL: define i32 @cmp_eq13(
+; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -979,41 +1016,43 @@ define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64_1LD-LABEL: @cmp_eq14(
+; X64_1LD-LABEL: define i32 @cmp_eq14(
+; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 6
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 6
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
-; X64_2LD-LABEL: @cmp_eq14(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 6
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 6
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-LABEL: define i32 @cmp_eq14(
+; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -1024,41 +1063,43 @@ define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64_1LD-LABEL: @cmp_eq15(
+; X64_1LD-LABEL: define i32 @cmp_eq15(
+; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 7
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
-; X64_2LD-LABEL: @cmp_eq15(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 7
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-LABEL: define i32 @cmp_eq15(
+; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -1069,12 +1110,13 @@ define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) {
}
define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
-; X64-LABEL: @cmp_eq16(
-; X64-NEXT: [[TMP3:%.*]] = load i128, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]]
-; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X64-LABEL: define i32 @cmp_eq16(
+; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) {
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64-NEXT: ret i32 [[CONV]]
;
diff --git a/llvm/test/Transforms/PhaseOrdering/PowerPC/lit.local.cfg b/llvm/test/Transforms/PhaseOrdering/PowerPC/lit.local.cfg
new file mode 100644
index 00000000000000..dfb347e640e144
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'PowerPC' in config.root.targets:
+ config.unsupported = True
\ No newline at end of file
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/memcmp-early.ll b/llvm/test/Transforms/PhaseOrdering/X86/memcmp-early.ll
new file mode 100644
index 00000000000000..a62b17de08ee43
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/memcmp-early.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -O2 -S -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+ at s1 = internal global ptr @.str, align 8
+ at s2 = internal global ptr @.str.1, align 8
+ at s3 = internal global ptr @.str.2, align 8
+ at .str = private unnamed_addr constant [9 x i8] c"01234000\00", align 1
+ at .str.1 = private unnamed_addr constant [9 x i8] c"0123!000\00", align 1
+ at .str.2 = private unnamed_addr constant [9 x i8] c"0123?000\00", align 1
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @memcmp_same_prefix_consts(ptr noundef %x) #0 {
+; CHECK-LABEL: define dso_local noundef i32 @memcmp_same_prefix_consts(
+; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[TMP0]], 858927408
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 52
+; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP1]]
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[DOTNOT]], label [[IF_END8:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP7:%.*]] = xor i32 [[TMP4]], 33
+; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP1]]
+; CHECK-NEXT: [[DOTNOT3:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[DOTNOT3]], label [[IF_END8]], label [[IF_THEN3:%.*]]
+; CHECK: if.then3:
+; CHECK-NEXT: [[TMP9:%.*]] = xor i32 [[TMP4]], 63
+; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP1]]
+; CHECK-NEXT: [[DOTNOT4:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT: br i1 [[DOTNOT4]], label [[IF_END8]], label [[RETURN:%.*]]
+; CHECK: if.end8:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[IF_END8]] ], [ 42, [[IF_THEN3]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ %retval = alloca i32, align 4
+ %x.addr = alloca ptr, align 8
+ store ptr %x, ptr %x.addr, align 8
+ %0 = load ptr, ptr %x.addr, align 8
+ %1 = load ptr, ptr @s1, align 8
+ %call = call i32 @memcmp(ptr noundef %0, ptr noundef %1, i64 noundef 5) #2
+ %cmp = icmp ne i32 %call, 0
+ br i1 %cmp, label %if.then, label %if.end8
+
+if.then: ; preds = %entry
+ %2 = load ptr, ptr %x.addr, align 8
+ %3 = load ptr, ptr @s2, align 8
+ %call1 = call i32 @memcmp(ptr noundef %2, ptr noundef %3, i64 noundef 5) #2
+ %cmp2 = icmp ne i32 %call1, 0
+ br i1 %cmp2, label %if.then3, label %if.end7
+
+if.then3: ; preds = %if.then
+ %4 = load ptr, ptr %x.addr, align 8
+ %5 = load ptr, ptr @s3, align 8
+ %call4 = call i32 @memcmp(ptr noundef %4, ptr noundef %5, i64 noundef 5) #2
+ %cmp5 = icmp ne i32 %call4, 0
+ br i1 %cmp5, label %if.then6, label %if.end
+
+if.then6: ; preds = %if.then3
+ store i32 42, ptr %retval, align 4
+ br label %return
+
+if.end: ; preds = %if.then3
+ br label %if.end7
+
+if.end7: ; preds = %if.end, %if.then
+ br label %if.end8
+
+if.end8: ; preds = %if.end7, %entry
+ store i32 0, ptr %retval, align 4
+ br label %return
+
+return: ; preds = %if.end8, %if.then6
+ %6 = load i32, ptr %retval, align 4
+ ret i32 %6
+}
+
+; Function Attrs: nounwind willreturn memory(read)
+declare i32 @memcmp(ptr noundef, ptr noundef, i64 noundef) #1
+
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/memcmp-mergeexpand.ll b/llvm/test/Transforms/PhaseOrdering/X86/memcmp-mergeexpand.ll
new file mode 100644
index 00000000000000..2de1f8576f631f
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/memcmp-mergeexpand.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S --passes=mergeicmps,expand-memcmp -mtriple=i686-unknown-linux < %s | FileCheck %s --check-prefix=X86
+; RUN: opt -S --passes=mergeicmps,expand-memcmp -mtriple=x86_64-unknown-linux < %s | FileCheck %s --check-prefix=X64
+
+; This tests interaction between MergeICmp and ExpandMemCmp.
+
+%"struct.std::pair" = type { i32, i32 }
+
+define zeroext i1 @opeq1(
+; X86-LABEL: define zeroext i1 @opeq1(
+; X86-SAME: ptr nocapture readonly dereferenceable(8) [[A:%.*]], ptr nocapture readonly dereferenceable(8) [[B:%.*]]) local_unnamed_addr {
+; X86-NEXT: "entry+land.rhs.i":
+; X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 1
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 1
+; X86-NEXT: [[TMP2:%.*]] = xor i32 [[TMP0]], [[TMP1]]
+; X86-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 4
+; X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 1
+; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X86-NEXT: [[TMP7:%.*]] = xor i32 [[TMP5]], [[TMP6]]
+; X86-NEXT: [[TMP8:%.*]] = or i32 [[TMP2]], [[TMP7]]
+; X86-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+; X86-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32
+; X86-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; X86-NEXT: br label [[OPEQ1_EXIT:%.*]]
+; X86: opeq1.exit:
+; X86-NEXT: ret i1 [[TMP11]]
+;
+; X64-LABEL: define zeroext i1 @opeq1(
+; X64-SAME: ptr nocapture readonly dereferenceable(8) [[A:%.*]], ptr nocapture readonly dereferenceable(8) [[B:%.*]]) local_unnamed_addr {
+; X64-NEXT: "entry+land.rhs.i":
+; X64-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 1
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[B]], align 1
+; X64-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP0]], [[TMP1]]
+; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; X64-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; X64-NEXT: br label [[OPEQ1_EXIT:%.*]]
+; X64: opeq1.exit:
+; X64-NEXT: ret i1 [[TMP4]]
+;
+ %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
+ %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+entry:
+ %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+ %0 = load i32, i32* %first.i, align 4
+ %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+ %1 = load i32, i32* %first1.i, align 4
+ %cmp.i = icmp eq i32 %0, %1
+ br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+ %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+ %2 = load i32, i32* %second.i, align 4
+ %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+ %3 = load i32, i32* %second2.i, align 4
+ %cmp3.i = icmp eq i32 %2, %3
+ br label %opeq1.exit
+
+opeq1.exit:
+ %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
+ ret i1 %4
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll b/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll
new file mode 100644
index 00000000000000..68dfacac5b5e12
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll
@@ -0,0 +1,856 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -O2 -S -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+ at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
+
+declare i32 @memcmp(ptr, ptr, i64)
+
+declare i32 @bcmp(ptr, ptr, i64)
+
+; Function Attrs: nounwind
+define i32 @length0(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define noundef i32 @length0(
+; CHECK-SAME: ptr nocapture readnone [[X:%.*]], ptr nocapture readnone [[Y:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: ret i32 0
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @length0_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define noundef i1 @length0_eq(
+; CHECK-SAME: ptr nocapture readnone [[X:%.*]], ptr nocapture readnone [[Y:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CHECK-NEXT: ret i1 true
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length0_lt(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define noundef i1 @length0_lt(
+; CHECK-SAME: ptr nocapture readnone [[X:%.*]], ptr nocapture readnone [[Y:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CHECK-NEXT: ret i1 false
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) #0
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i32 @length2(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i32 @length2(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = sub nsw i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: ret i32 [[TMP7]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @length2_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length2_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[DOTNOT]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length2_lt(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length2_lt(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: [[C:%.*]] = icmp ult i16 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #0
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length2_gt(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length2_gt(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i16 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #0
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length2_eq_const(ptr %X) #0 {
+; CHECK-LABEL: define i1 @length2_eq_const(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) #0
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length2_eq_nobuiltin_attr(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #1
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i32 @length3(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i32 @length3(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: loadbb:
+; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP0]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i16 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = sub nsw i32 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: br label [[ENDBLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP13]], [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @length3_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length3_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i16 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i8 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i16
+; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP3]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i16 [[TMP10]], 0
+; CHECK-NEXT: ret i1 [[TMP11]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) #0
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i32 @length4(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i32 @length4(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; CHECK-NEXT: [[DOTNEG:%.*]] = sext i1 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = add nsw i32 [[DOTNEG]], [[TMP7]]
+; CHECK-NEXT: ret i32 [[TMP8]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @length4_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length4_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[TMP3]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) #0
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length4_lt(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length4_lt(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: ret i1 [[TMP5]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) #0
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length4_gt(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length4_gt(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: ret i1 [[TMP5]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) #0
+ %c = icmp sgt i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length4_eq_const(ptr %X) #0 {
+; CHECK-LABEL: define i1 @length4_eq_const(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 875770417
+; CHECK-NEXT: ret i1 [[DOTNOT]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i32 @length5(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i32 @length5(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: loadbb:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP0]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = sub nsw i32 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: br label [[ENDBLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP13]], [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @length5_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length5_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i8 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP3]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
+; CHECK-NEXT: ret i1 [[TMP11]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) #0
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length5_lt(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length5_lt(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: loadbb:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP0]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i8 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: br label [[ENDBLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i1 [ [[TMP10]], [[LOADBB1]] ], [ [[TMP5]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i1 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) #0
+ %c = icmp slt i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length7_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length7_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: ret i1 [[TMP9]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) #0
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i32 @length8(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i32 @length8(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; CHECK-NEXT: [[DOTNEG:%.*]] = sext i1 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = add nsw i32 [[DOTNEG]], [[TMP7]]
+; CHECK-NEXT: ret i32 [[TMP8]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @length8_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length8_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[DOTNOT]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length8_eq_const(ptr %X) #0 {
+; CHECK-LABEL: define i1 @length8_eq_const(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) #0
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length9_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length9_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i8 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP3]], [[TMP9]]
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP10]], 0
+; CHECK-NEXT: ret i1 [[DOTNOT]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length10_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length10_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i16 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i16 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP3]], [[TMP9]]
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP10]], 0
+; CHECK-NEXT: ret i1 [[DOTNOT]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length11_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length11_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[DOTNOT2:%.*]] = and i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: ret i1 [[DOTNOT2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length12_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length12_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP3]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i64 [[TMP10]], 0
+; CHECK-NEXT: ret i1 [[TMP11]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) #0
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i32 @length12(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i32 @length12(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: loadbb:
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP2]], [[LOADBB:%.*]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP3]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP9]])
+; CHECK-NEXT: [[TMP12:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; CHECK-NEXT: [[TMP13]] = zext i32 [[TMP11]] to i64
+; CHECK-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @length13_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length13_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[DOTNOT2:%.*]] = and i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: ret i1 [[DOTNOT2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length14_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length14_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[DOTNOT2:%.*]] = and i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: ret i1 [[DOTNOT2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @length15_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @length15_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[DOTNOT2:%.*]] = and i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: ret i1 [[DOTNOT2]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i32 @length16(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i32 @length16(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: loadbb:
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP2]], [[LOADBB:%.*]] ], [ [[TMP11:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP3]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 -1, i32 1
+; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
+; CHECK: loadbb1:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11]] = tail call i64 @llvm.bswap.i64(i64 [[TMP9]])
+; CHECK-NEXT: [[TMP12]] = tail call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK: endblock:
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[PHI_RES]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @length16_eq(ptr %x, ptr %y) #0 {
+; CHECK-LABEL: define i1 @length16_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[TMP3]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) #0
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+; Function Attrs: nounwind
+define i1 @length16_eq_const(ptr %X) #0 {
+; CHECK-LABEL: define i1 @length16_eq_const(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i128 [[TMP1]], 70720121592765328381466889075544961328
+; CHECK-NEXT: ret i1 [[DOTNOT]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i32 @length24(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i32 @length24(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(24) [[X]], ptr noundef nonnull dereferenceable(24) [[Y]], i64 24) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @length24_eq(ptr %x, ptr %y) #0 {
+; CHECK-LABEL: define i1 @length24_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i64 [[TMP8]] to i128
+; CHECK-NEXT: [[TMP10:%.*]] = or i128 [[TMP3]], [[TMP9]]
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i128 [[TMP10]], 0
+; CHECK-NEXT: ret i1 [[DOTNOT]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) #0
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+; Function Attrs: nounwind
+define i1 @length24_eq_const(ptr %X) #0 {
+; CHECK-LABEL: define i1 @length24_eq_const(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3689065127958034230
+; CHECK-NEXT: [[TMP6:%.*]] = zext i64 [[TMP5]] to i128
+; CHECK-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: ret i1 [[TMP8]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) #0
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i32 @length32(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i32 @length32(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(32) [[X]], ptr noundef nonnull dereferenceable(32) [[Y]], i64 32) #[[ATTR5]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @length32_eq(ptr %x, ptr %y) #0 {
+; CHECK-LABEL: define i1 @length32_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Y]], i64 16
+; CHECK-NEXT: [[TMP5:%.*]] = load i128, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i128 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i128 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[DOTNOT2:%.*]] = and i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: ret i1 [[DOTNOT2]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) #0
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+; Function Attrs: nounwind
+define i1 @length32_eq_const(ptr %X) #0 {
+; CHECK-LABEL: define i1 @length32_eq_const(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], 65382562593882267225249597816672106294
+; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) #0
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i32 @length64(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i32 @length64(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(64) [[X]], ptr noundef nonnull dereferenceable(64) [[Y]], i64 64) #[[ATTR5]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @length64_eq(ptr %x, ptr %y) #0 {
+; CHECK-LABEL: define i1 @length64_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(64) [[X]], ptr noundef nonnull dereferenceable(64) [[Y]], i64 64) #[[ATTR5]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) #0
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+; Function Attrs: nounwind
+define i1 @length64_eq_const(ptr %X) #0 {
+; CHECK-LABEL: define i1 @length64_eq_const(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(64) [[X]], ptr noundef nonnull dereferenceable(64) @.str, i64 64) #[[ATTR5]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i32 @huge_length(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i32 @huge_length(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(9223372036854775807) [[X]], ptr noundef nonnull dereferenceable(9223372036854775807) [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @huge_length_eq(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @huge_length_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(9223372036854775807) [[X]], ptr noundef nonnull dereferenceable(9223372036854775807) [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) #0 {
+; CHECK-LABEL: define i32 @nonconst_length(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; CHECK-NEXT: ret i32 [[M]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) #0
+ ret i32 %m
+}
+
+; Function Attrs: nounwind
+define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) #0 {
+; CHECK-LABEL: define i1 @nonconst_length_eq(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; Function Attrs: nounwind
+define i1 @bcmp_length2(ptr %X, ptr %Y) #0 {
+; CHECK-LABEL: define i1 @bcmp_length2(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[DOTNOT]]
+;
+ %m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) #0
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nobuiltin nounwind }
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index b6068513d23063..3e3a40f217e63e 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -422,7 +422,6 @@ int main(int argc, char **argv) {
// supported.
initializeExpandLargeDivRemLegacyPassPass(Registry);
initializeExpandLargeFpConvertLegacyPassPass(Registry);
- initializeExpandMemCmpLegacyPassPass(Registry);
initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
initializeSelectOptimizePass(Registry);
initializeCallBrPreparePass(Registry);
diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
index 047f6583ec4e88..d618bd5bfab9aa 100644
--- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
@@ -71,7 +71,6 @@ static_library("CodeGen") {
"ExecutionDomainFix.cpp",
"ExpandLargeDivRem.cpp",
"ExpandLargeFpConvert.cpp",
- "ExpandMemCmp.cpp",
"ExpandPostRAPseudos.cpp",
"ExpandReductions.cpp",
"ExpandVectorPredication.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn
index bed26df94e2c45..876f5fece1128e 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn
@@ -23,6 +23,7 @@ static_library("Scalar") {
"DeadStoreElimination.cpp",
"DivRemPairs.cpp",
"EarlyCSE.cpp",
+ "ExpandMemCmp.cpp",
"FlattenCFGPass.cpp",
"Float2Int.cpp",
"GVN.cpp",
>From 0907c0514d957832263ee6765d3d0b17ceae2454 Mon Sep 17 00:00:00 2001
From: Gabriel Baraldi <baraldigabriel at gmail.com>
Date: Mon, 8 Jan 2024 19:17:16 -0300
Subject: [PATCH 02/11] Whitespace fixes
---
.../include/llvm/CodeGen/CodeGenPassBuilder.h | 1 -
llvm/include/llvm/CodeGen/Passes.h | 1 -
llvm/include/llvm/InitializePasses.h | 2 +-
llvm/include/llvm/LinkAllPasses.h | 2 +-
llvm/lib/CodeGen/TargetPassConfig.cpp | 1 -
llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp | 38 +++++++++----------
6 files changed, 19 insertions(+), 26 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
index 556304231b397b..d132837b439fb3 100644
--- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -628,7 +628,6 @@ void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const {
addPass(PrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
}
-
// Run GC lowering passes for builtin collectors
// TODO: add a pass insertion point here
addPass(GCLoweringPass());
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index e5ed5f15f62ed7..f8fae1b91314bc 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -519,7 +519,6 @@ namespace llvm {
// Expands large div/rem instructions.
FunctionPass *createExpandLargeFpConvertPass();
-
/// Creates Break False Dependencies pass. \see BreakFalseDeps.cpp
FunctionPass *createBreakFalseDeps();
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index b0ca9fa942cda3..b59ddf71743383 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -102,7 +102,7 @@ void initializeEarlyTailDuplicatePass(PassRegistry&);
void initializeEdgeBundlesPass(PassRegistry&);
void initializeEHContGuardCatchretPass(PassRegistry &);
void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry&);
-void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
+void initializeExpandLargeDivRemLegacyPassPass(PassRegistry &);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
void initializeExpandVectorPredicationPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 9aff428fbe938b..e414738dd325e8 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -118,7 +118,7 @@ namespace {
(void) llvm::createGVNPass();
(void) llvm::createPostDomTree();
(void) llvm::createMergeICmpsLegacyPass();
- (void) llvm::createExpandLargeDivRemPass();
+ (void)llvm::createExpandLargeDivRemPass();
(void) llvm::createExpandVectorPredicationPass();
std::string buf;
llvm::raw_string_ostream os(buf);
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 33562e90e94426..ac65bbab05a4b4 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -867,7 +867,6 @@ void TargetPassConfig::addIRPasses() {
addPass(createPrintFunctionPass(dbgs(),
"\n\n*** Code after LSR ***\n"));
}
-
}
// Run GC lowering passes for builtin collectors
diff --git a/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
index 973875ee142978..d2124212526adf 100644
--- a/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
+++ b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
@@ -36,7 +36,6 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-
#define DEBUG_TYPE "expand-memcmp"
STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
@@ -60,7 +59,6 @@ static cl::opt<unsigned> MaxLoadsPerMemcmpOptSize(
namespace {
-
// This class provides helper functions to expand a memcmp library call into an
// inline expansion.
class MemCmpExpansion {
@@ -90,8 +88,7 @@ class MemCmpExpansion {
// 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
struct LoadEntry {
LoadEntry(unsigned LoadSize, uint64_t Offset)
- : LoadSize(LoadSize), Offset(Offset) {
- }
+ : LoadSize(LoadSize), Offset(Offset) {}
// The size of the load for this block, in bytes.
unsigned LoadSize;
@@ -724,7 +721,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
// calculate which source was larger. The calculation requires the
// two loaded source values of each load compare block.
// These will be saved in the phi nodes created by setupResultBlockPHINodes.
- if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
+ if (!IsUsedForZeroCmp)
+ setupResultBlockPHINodes();
// Create the number of required load compare basic blocks.
createLoadCmpBlocks();
@@ -845,16 +843,15 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
}
const uint64_t SizeVal = SizeCast->getZExtValue();
-
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp =
IsBCmp || isOnlyUsedInZeroEqualityComparison(CI);
bool OptForSize = CI->getFunction()->hasOptSize() ||
llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
- auto Options = TTI->enableMemCmpExpansion(OptForSize,
- IsUsedForZeroCmp);
- if (!Options) return false;
+ auto Options = TTI->enableMemCmpExpansion(OptForSize, IsUsedForZeroCmp);
+ if (!Options)
+ return false;
Value *Res = nullptr;
if (SizeVal == 0) {
@@ -863,8 +860,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
- if (OptForSize &&
- MaxLoadsPerMemcmpOptSize.getNumOccurrences())
+ if (OptForSize && MaxLoadsPerMemcmpOptSize.getNumOccurrences())
Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
@@ -892,20 +888,19 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
// Returns true if a change was made.
static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
- const DataLayout &DL, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
+ const TargetTransformInfo *TTI, const DataLayout &DL,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
+ DomTreeUpdater *DTU);
static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DominatorTree *DT);
-
bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
- const DataLayout &DL, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) {
+ const TargetTransformInfo *TTI, const DataLayout &DL,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
+ DomTreeUpdater *DTU) {
for (Instruction &I : BB) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI) {
@@ -922,13 +917,14 @@ bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
}
PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI, DominatorTree *DT) {
+ const TargetTransformInfo *TTI,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
+ DominatorTree *DT) {
std::optional<DomTreeUpdater> DTU;
if (DT)
DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
- const DataLayout& DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getParent()->getDataLayout();
bool MadeChanges = false;
for (auto BBIt = F.begin(); BBIt != F.end();) {
if (runOnBlock(*BBIt, TLI, TTI, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
>From 77d7e14f3f158c5d9a0724ebebcbcd1edbda4730 Mon Sep 17 00:00:00 2001
From: Gabriel Baraldi <baraldigabriel at gmail.com>
Date: Tue, 9 Jan 2024 17:24:36 -0300
Subject: [PATCH 03/11] Apply suggestions from code review and skip memcmp
expansion when sanitizers are on
---
llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp | 8 ++
.../test/CodeGen/AArch64/dag-combine-setcc.ll | 50 +++++-----
.../AArch64/machine-licm-hoist-load.ll | 94 +++++++++----------
3 files changed, 80 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
index d2124212526adf..ed3843de422f00 100644
--- a/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
+++ b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
@@ -920,6 +920,14 @@ PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
DominatorTree *DT) {
+ // Sanitizers prefer that calls to memcmp remain as such
+ //so that they may be itercepted, but since the sanitizer passes run late
+ // we disable the optimization here. See maybeMarkSanitizerLibraryCallNoBuiltin
+ if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
+ F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeThread))
+ return PreservedAnalyses::all();
std::optional<DomTreeUpdater> DTU;
if (DT)
DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
index 855a5b23f6c1cc..f58de2efb1c310 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
@@ -266,19 +266,19 @@ define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) {
; CHECK-NEXT: ccmp x10, x11, #0, eq
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
- %1 = load i64, ptr %a, align 1
- %2 = load i64, ptr %b, align 1
- %3 = xor i64 %1, %2
- %4 = getelementptr i8, ptr %a, i64 8
- %5 = getelementptr i8, ptr %b, i64 8
- %6 = load i64, ptr %4, align 1
- %7 = load i64, ptr %5, align 1
- %8 = xor i64 %6, %7
- %9 = or i64 %3, %8
- %10 = icmp ne i64 %9, 0
- %11 = zext i1 %10 to i32
- %cmp = icmp eq i32 %11, 0
- ret i1 %cmp
+ %a.0 = load i64, ptr %a, align 1
+ %b.0 = load i64, ptr %b, align 1
+ %xor1 = xor i64 %a.0, %b.0
+ %1 = getelementptr i8, ptr %a, i64 8
+ %2 = getelementptr i8, ptr %b, i64 8
+ %a.8 = load i64, ptr %1, align 1
+ %b.8 = load i64, ptr %2, align 1
+ %xor2 = xor i64 %a.8, %b.8
+ %or = or i64 %xor1, %xor2
+ %cmp1 = icmp ne i64 %or, 0
+ %ext = zext i1 %cmp to i32
+ %cmp2 = icmp eq i32 %ext, 0
+ ret i1 %cmp2
}
define i1 @combine_setcc_ne0_conjunction_xor_or(ptr %a, ptr %b) {
@@ -290,18 +290,18 @@ define i1 @combine_setcc_ne0_conjunction_xor_or(ptr %a, ptr %b) {
; CHECK-NEXT: ccmp x10, x11, #0, eq
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
- %1 = load i64, ptr %a, align 1
- %2 = load i64, ptr %b, align 1
- %3 = xor i64 %1, %2
- %4 = getelementptr i8, ptr %a, i64 8
- %5 = getelementptr i8, ptr %b, i64 8
- %6 = load i64, ptr %4, align 1
- %7 = load i64, ptr %5, align 1
- %8 = xor i64 %6, %7
- %9 = or i64 %3, %8
- %10 = icmp ne i64 %9, 0
- %11 = zext i1 %10 to i32
- ret i1 %10
+ %a.0 = load i64, ptr %a, align 1
+ %b.0 = load i64, ptr %b, align 1
+ %xor1 = xor i64 %a.0, %b.0
+ %1 = getelementptr i8, ptr %a, i64 8
+ %2 = getelementptr i8, ptr %b, i64 8
+ %a.8 = load i64, ptr %1, align 1
+ %b.8 = load i64, ptr %2, align 1
+ %xor2 = xor i64 %a.8, %b.8
+ %or = or i64 %xor1, %xor2
+ %cmp = icmp ne i64 %or, 0
+ %ext = zext i1 %cmp to i32
+ ret i1 %ext
}
; Doesn't increase the number of instructions, where the LHS has multiple uses
diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
index fc0bc1b9661163..0651027b7b9b5f 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
@@ -30,11 +30,11 @@ for.body: ; preds = %for.body, %entry
%sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06
%0 = load ptr, ptr %arrayidx, align 8
- %1 = load i32, ptr %0, align 1
- %2 = load i32, ptr %b, align 1
- %3 = icmp ne i32 %1, %2
- %4 = zext i1 %3 to i32
- %tobool = icmp eq i32 %4, 0
+ %bcmp_exp = load i32, ptr %0, align 1
+ %bcmp_exp2 = load i32, ptr %b, align 1
+ %cmp = icmp ne i32 %bcmp_exp, %bcmp_exp2
+ %res = zext i1 %cmp to i32
+ %tobool = icmp eq i32 %res, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.05, %add
%inc = add nuw i64 %i.06, 1
@@ -94,11 +94,11 @@ for.body4: ; preds = %for.body4, %for.con
%sum.115 = phi i64 [ %sum.018, %for.cond1.preheader ], [ %spec.select, %for.body4 ]
%arrayidx5 = getelementptr inbounds ptr, ptr %0, i64 %j.016
%1 = load ptr, ptr %arrayidx5, align 8
- %2 = load i32, ptr %1, align 1
- %3 = load i32, ptr %b, align 1
- %4 = icmp ne i32 %2, %3
- %5 = zext i1 %4 to i32
- %tobool = icmp eq i32 %5, 0
+ %bcmp_exp = load i32, ptr %1, align 1
+ %bcmp_exp2 = load i32, ptr %b, align 1
+ %cmp = icmp ne i32 %bcmp_exp, %bcmp_exp2
+ %res = zext i1 %cmp to i32
+ %tobool = icmp eq i32 %res, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.115, %add
%inc = add nuw i64 %j.016, 1
@@ -184,11 +184,11 @@ for.body8: ; preds = %for.body8, %for.con
%sum.225 = phi i64 [ %sum.128, %for.cond5.preheader ], [ %spec.select, %for.body8 ]
%arrayidx10 = getelementptr inbounds ptr, ptr %1, i64 %k.026
%2 = load ptr, ptr %arrayidx10, align 8
- %3 = load i32, ptr %2, align 1
- %4 = load i32, ptr %b, align 1
- %5 = icmp ne i32 %3, %4
- %6 = zext i1 %5 to i32
- %tobool = icmp eq i32 %6, 0
+ %bcmp_exp = load i32, ptr %2, align 1
+ %bcmp_exp2 = load i32, ptr %b, align 1
+ %cmp = icmp ne i32 %bcmp_exp, %bcmp_exp2
+ %res = zext i1 %cmp to i32
+ %tobool = icmp eq i32 %res, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.225, %add
%inc = add nuw i64 %k.026, 1
@@ -284,11 +284,11 @@ for.body8: ; preds = %for.body8, %for.con
%sum.227 = phi i64 [ %sum.130, %for.cond5.preheader ], [ %spec.select, %for.body8 ]
%arrayidx10 = getelementptr inbounds ptr, ptr %1, i64 %k.028
%3 = load ptr, ptr %arrayidx10, align 8
- %4 = load i32, ptr %3, align 1
- %5 = load i32, ptr %2, align 1
- %6 = icmp ne i32 %4, %5
- %7 = zext i1 %6 to i32
- %tobool = icmp eq i32 %7, 0
+ %bcmp_exp = load i32, ptr %3, align 1
+ %bcmp_exp2 = load i32, ptr %2, align 1
+ %cmp = icmp ne i32 %bcmp_exp, %bcmp_exp2
+ %res = zext i1 %cmp to i32
+ %tobool = icmp eq i32 %res, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.227, %add
%inc = add nuw i64 %k.028, 1
@@ -344,16 +344,16 @@ for.body: ; preds = %for.body, %for.body
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv
%0 = load ptr, ptr %arrayidx, align 8
- %1 = load i32, ptr %0, align 1
- %2 = load i32, ptr %b, align 1
- %3 = call i32 @llvm.bswap.i32(i32 %1)
- %4 = call i32 @llvm.bswap.i32(i32 %2)
- %5 = icmp ugt i32 %3, %4
- %6 = icmp ult i32 %3, %4
- %7 = zext i1 %5 to i32
- %8 = zext i1 %6 to i32
- %9 = sub i32 %7, %8
- %conv = trunc i32 %9 to i8
+ %memcmp_exp = load i32, ptr %0, align 1
+ %memcmp_exp2 = load i32, ptr %b, align 1
+ %swap = call i32 @llvm.bswap.i32(i32 %memcmp_exp)
+ %swap2 = call i32 @llvm.bswap.i32(i32 %memcmp_exp2)
+ %cmp1 = icmp ugt i32 %swap, %swap2
+ %cmp2 = icmp ult i32 %swap, %swap2
+ %ext1 = zext i1 %cmp1 to i32
+ %ext2 = zext i1 %cmp2 to i32
+ %res = sub i32 %7, %8
+ %conv = trunc i32 %res to i8
%arrayidx2 = getelementptr inbounds i8, ptr %c, i64 %indvars.iv
store i8 %conv, ptr %arrayidx2, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -410,11 +410,11 @@ for.body: ; preds = %for.body, %for.body
%sum.05 = phi i32 [ 0, %for.body.preheader ], [ %spec.select, %for.body ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv
%0 = load ptr, ptr %arrayidx, align 8
- %1 = load i32, ptr %0, align 1
- %2 = load i32, ptr %b, align 1
- %3 = icmp ne i32 %1, %2
- %4 = zext i1 %3 to i32
- %tobool.not = icmp eq i32 %4, 0
+ %bcmp_exp = load i32, ptr %0, align 1
+ %bcmp_exp2 = load i32, ptr %b, align 1
+ %cmp = icmp ne i32 %bcmp, %bcmp_exp2
+ %res = zext i1 %cmp to i32
+ %tobool.not = icmp eq i32 %res, 0
%add = zext i1 %tobool.not to i32
%spec.select = add nuw nsw i32 %sum.05, %add
tail call void @func()
@@ -459,20 +459,20 @@ for.body: ; preds = %for.body, %entry
%sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06
%0 = load ptr, ptr %arrayidx, align 8
- %1 = load i32, ptr %0, align 1
- %2 = load i32, ptr %b, align 1
- %3 = xor i32 %1, %2
+ %bcmp_exp = load i32, ptr %0, align 1
+ %bcmp_exp2 = load i32, ptr %b, align 1
+ %xor1 = xor i32 %bcmp_exp, %bcmp_exp2
%4 = getelementptr i8, ptr %0, i64 4
%5 = getelementptr i8, ptr %b, i64 4
- %6 = load i16, ptr %4, align 1
- %7 = load i16, ptr %5, align 1
- %8 = zext i16 %6 to i32
- %9 = zext i16 %7 to i32
- %10 = xor i32 %8, %9
- %11 = or i32 %3, %10
- %12 = icmp ne i32 %11, 0
- %13 = zext i1 %12 to i32
- %tobool = icmp eq i32 %13, 0
+ %bcmp_exp3 = load i16, ptr %4, align 1
+ %bcmp_exp4 = load i16, ptr %5, align 1
+ %ext = zext i16 %6 to i32
+ %ext2 = zext i16 %7 to i32
+ %xor2 = xor i32 %ext, %ext2
+ %or = or i32 %xor1, %xor2
+ %cmp = icmp ne i32 %or, 0
+ %res = zext i1 %cmp to i32
+ %tobool = icmp eq i32 %res, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.05, %add
%inc = add nuw i64 %i.06, 1
>From 22dae428c22a01b39aecae943a18f646eabc9932 Mon Sep 17 00:00:00 2001
From: Gabriel Baraldi <baraldigabriel at gmail.com>
Date: Wed, 10 Jan 2024 11:13:36 -0300
Subject: [PATCH 04/11] Move pass to later in the pipeline!
---
llvm/lib/Passes/PassBuilderPipelines.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index e2dd413f12d696..391d35e968ae1c 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -388,8 +388,6 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
if (AreStatisticsEnabled())
FPM.addPass(CountVisitsPass());
- FPM.addPass(MergeICmpsPass());
- FPM.addPass(ExpandMemCmpPass(TM));
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -536,8 +534,6 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
if (AreStatisticsEnabled())
FPM.addPass(CountVisitsPass());
- FPM.addPass(MergeICmpsPass());
- FPM.addPass(ExpandMemCmpPass(TM));
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -1436,6 +1432,10 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// result too early.
OptimizePM.addPass(LoopSinkPass());
+ // Detect anc convert memcpm like idioms to the call, and expand when profitable
+ OptimizePM.addPass(MergeICmpsPass());
+ OptimizePM.addPass(ExpandMemCmpPass(TM));
+
// And finally clean up LCSSA form before generating code.
OptimizePM.addPass(InstSimplifyPass());
>From 63a015f24bd77c8fdb55e6d1afd5fb018eee2484 Mon Sep 17 00:00:00 2001
From: Gabriel Baraldi <baraldigabriel at gmail.com>
Date: Wed, 10 Jan 2024 18:10:17 +0000
Subject: [PATCH 05/11] Apply suggestions from code review
---
.../test/CodeGen/AArch64/dag-combine-setcc.ll | 4 +-
.../AArch64/machine-licm-hoist-load.ll | 16 +-
llvm/test/Other/new-pm-defaults.ll | 8 +-
.../Other/new-pm-thinlto-postlink-defaults.ll | 6 +-
.../new-pm-thinlto-postlink-pgo-defaults.ll | 6 +-
...-pm-thinlto-postlink-samplepgo-defaults.ll | 6 +-
.../Other/new-pm-thinlto-prelink-defaults.ll | 4 +-
.../new-pm-thinlto-prelink-pgo-defaults.ll | 26 +-
...w-pm-thinlto-prelink-samplepgo-defaults.ll | 4 +-
.../PhaseOrdering/X86/memcmp-early.ll | 41 +-
.../Transforms/PhaseOrdering/X86/memcmp.ll | 403 ++++++++++--------
11 files changed, 293 insertions(+), 231 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
index f58de2efb1c310..f22d3acb75026f 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
@@ -276,7 +276,7 @@ define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) {
%xor2 = xor i64 %a.8, %b.8
%or = or i64 %xor1, %xor2
%cmp1 = icmp ne i64 %or, 0
- %ext = zext i1 %cmp to i32
+ %ext = zext i1 %cmp1 to i32
%cmp2 = icmp eq i32 %ext, 0
ret i1 %cmp2
}
@@ -301,7 +301,7 @@ define i1 @combine_setcc_ne0_conjunction_xor_or(ptr %a, ptr %b) {
%or = or i64 %xor1, %xor2
%cmp = icmp ne i64 %or, 0
%ext = zext i1 %cmp to i32
- ret i1 %ext
+ ret i1 %cmp
}
; Doesn't increase the number of instructions, where the LHS has multiple uses
diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
index 0651027b7b9b5f..4c2188cf340e87 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
@@ -352,7 +352,7 @@ for.body: ; preds = %for.body, %for.body
%cmp2 = icmp ult i32 %swap, %swap2
%ext1 = zext i1 %cmp1 to i32
%ext2 = zext i1 %cmp2 to i32
- %res = sub i32 %7, %8
+ %res = sub i32 %ext1, %ext2
%conv = trunc i32 %res to i8
%arrayidx2 = getelementptr inbounds i8, ptr %c, i64 %indvars.iv
store i8 %conv, ptr %arrayidx2, align 1
@@ -412,7 +412,7 @@ for.body: ; preds = %for.body, %for.body
%0 = load ptr, ptr %arrayidx, align 8
%bcmp_exp = load i32, ptr %0, align 1
%bcmp_exp2 = load i32, ptr %b, align 1
- %cmp = icmp ne i32 %bcmp, %bcmp_exp2
+ %cmp = icmp ne i32 %bcmp_exp, %bcmp_exp2
%res = zext i1 %cmp to i32
%tobool.not = icmp eq i32 %res, 0
%add = zext i1 %tobool.not to i32
@@ -462,12 +462,12 @@ for.body: ; preds = %for.body, %entry
%bcmp_exp = load i32, ptr %0, align 1
%bcmp_exp2 = load i32, ptr %b, align 1
%xor1 = xor i32 %bcmp_exp, %bcmp_exp2
- %4 = getelementptr i8, ptr %0, i64 4
- %5 = getelementptr i8, ptr %b, i64 4
- %bcmp_exp3 = load i16, ptr %4, align 1
- %bcmp_exp4 = load i16, ptr %5, align 1
- %ext = zext i16 %6 to i32
- %ext2 = zext i16 %7 to i32
+ %gep0 = getelementptr i8, ptr %0, i64 4
+ %gepb = getelementptr i8, ptr %b, i64 4
+ %bcmp_exp3 = load i16, ptr %gep0, align 1
+ %bcmp_exp4 = load i16, ptr %gepb, align 1
+ %ext = zext i16 %bcmp_exp3 to i32
+ %ext2 = zext i16 %bcmp_exp4 to i32
%xor2 = xor i32 %ext, %ext2
%or = or i32 %xor1, %xor2
%cmp = icmp ne i32 %or, 0
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index ce13b2eb52a7ef..c5d70a6a0b2c33 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -142,12 +142,10 @@
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass
-; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
-; CHECK-O-NEXT: Running analysis: AAManager on foo
-; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
+; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
@@ -269,6 +267,8 @@
; CHECK-O-NEXT: Running pass: LICMPass
; CHECK-O-NEXT: Running pass: AlignmentFromAssumptionsPass
; CHECK-O-NEXT: Running pass: LoopSinkPass
+; CHECK-O-NEXT: Running pass: MergeICmpsPass
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass
; CHECK-O-NEXT: Running pass: InstSimplifyPass
; CHECK-O-NEXT: Running pass: DivRemPairsPass
; CHECK-O-NEXT: Running pass: TailCallElimPass
@@ -315,4 +315,4 @@ loop:
br i1 %cmp, label %exit, label %loop
exit:
ret void
-}
+}
\ No newline at end of file
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index d6f09a85953c14..a31a1b069b0474 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -81,12 +81,10 @@
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
-; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
-; CHECK-O-NEXT: Running analysis: AAManager on foo
-; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
+; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
@@ -197,6 +195,8 @@
; CHECK-POSTLINK-O-NEXT: Running pass: LICMPass
; CHECK-POSTLINK-O-NEXT: Running pass: AlignmentFromAssumptionsPass
; CHECK-POSTLINK-O-NEXT: Running pass: LoopSinkPass
+; CHECK-POSTLINK-O-NEXT: Running pass: MergeICmpsPass
+; CHECK-POSTLINK-O-NEXT: Running pass: ExpandMemCmpPass
; CHECK-POSTLINK-O-NEXT: Running pass: InstSimplifyPass
; CHECK-POSTLINK-O-NEXT: Running pass: DivRemPairsPass
; CHECK-POSTLINK-O-NEXT: Running pass: TailCallElimPass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index cc3939c5bdcf7b..03a77c0d5bc8b6 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -69,12 +69,10 @@
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass
-; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
-; CHECK-O-NEXT: Running analysis: AAManager on foo
-; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
+; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
@@ -182,6 +180,8 @@
; CHECK-O-NEXT: Running pass: LICMPass
; CHECK-O-NEXT: Running pass: AlignmentFromAssumptionsPass
; CHECK-O-NEXT: Running pass: LoopSinkPass
+; CHECK-O-NEXT: Running pass: MergeICmpsPass
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass
; CHECK-O-NEXT: Running pass: InstSimplifyPass
; CHECK-O-NEXT: Running pass: DivRemPairsPass
; CHECK-O-NEXT: Running pass: TailCallElimPass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index bf354c91d15f37..b5c5f0f537e774 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -77,12 +77,10 @@
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass
-; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
-; CHECK-O-NEXT: Running analysis: AAManager on foo
-; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
+; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
@@ -190,6 +188,8 @@
; CHECK-O-NEXT: Running pass: LICMPass
; CHECK-O-NEXT: Running pass: AlignmentFromAssumptionsPass
; CHECK-O-NEXT: Running pass: LoopSinkPass
+; CHECK-O-NEXT: Running pass: MergeICmpsPass
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass
; CHECK-O-NEXT: Running pass: InstSimplifyPass
; CHECK-O-NEXT: Running pass: DivRemPairsPass
; CHECK-O-NEXT: Running pass: TailCallElimPass
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
index 9c5f9fd281ee7c..6486639e07b49c 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
@@ -112,12 +112,10 @@
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
-; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
-; CHECK-O-NEXT: Running analysis: AAManager on foo
-; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
+; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
index 92ab5b6bbc74ad..09f9f0f48baddb 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -102,23 +102,17 @@
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass
-; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis on foo
-; CHECK-O-NEXT: Running analysis: AAManager on foo
-; CHECK-O-NEXT: Running analysis: BasicAA on foo
-; CHECK-O-NEXT: Running analysis: AssumptionAnalysis on foo
-; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis on foo
-; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA on foo
-; CHECK-O-NEXT: Running analysis: TypeBasedAA on foo
-; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
-; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
-; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo
-; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo
-; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
-; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SROAPass
+; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
+; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
+; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
+; CHECK-O-NEXT: Running analysis: AAManager
+; CHECK-O-NEXT: Running analysis: BasicAA
+; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA
+; CHECK-O-NEXT: Running analysis: TypeBasedAA
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
@@ -126,6 +120,10 @@
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: InstCombinePass
+; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo
+; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo
+; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
+; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O23SZ-NEXT: Running pass: AggressiveInstCombinePass
; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass
; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
index b565e80ac05e90..47bdbfd2d357d4 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -81,12 +81,10 @@
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass
-; CHECK-O-NEXT: Running pass: MergeICmpsPass on foo
-; CHECK-O-NEXT: Running analysis: AAManager on foo
-; CHECK-O-NEXT: Running pass: ExpandMemCmpPass on foo
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
+; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/memcmp-early.ll b/llvm/test/Transforms/PhaseOrdering/X86/memcmp-early.ll
index a62b17de08ee43..b4f7780444b25e 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/memcmp-early.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/memcmp-early.ll
@@ -1,6 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -O2 -S -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+; Examples of when moving memcmp expansion earlier in the pipeline are beneficial
+
@s1 = internal global ptr @.str, align 8
@s2 = internal global ptr @.str.1, align 8
@s3 = internal global ptr @.str.2, align 8
@@ -19,19 +22,35 @@ define dso_local i32 @memcmp_same_prefix_consts(ptr noundef %x) #0 {
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 52
-; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP1]]
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP6]], 0
-; CHECK-NEXT: br i1 [[DOTNOT]], label [[IF_END8:%.*]], label [[IF_THEN:%.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP1]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[IF_END8:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
-; CHECK-NEXT: [[TMP7:%.*]] = xor i32 [[TMP4]], 33
-; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP1]]
-; CHECK-NEXT: [[DOTNOT3:%.*]] = icmp eq i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[DOTNOT3]], label [[IF_END8]], label [[IF_THEN3:%.*]]
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP9]], 858927408
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 1
+; CHECK-NEXT: [[TMP13:%.*]] = zext i8 [[TMP12]] to i32
+; CHECK-NEXT: [[TMP14:%.*]] = xor i32 [[TMP13]], 33
+; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP10]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i32 [[TMP17]], 0
+; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[IF_END8]], label [[IF_THEN3:%.*]]
; CHECK: if.then3:
-; CHECK-NEXT: [[TMP9:%.*]] = xor i32 [[TMP4]], 63
-; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP1]]
-; CHECK-NEXT: [[DOTNOT4:%.*]] = icmp eq i32 [[TMP10]], 0
-; CHECK-NEXT: br i1 [[DOTNOT4]], label [[IF_END8]], label [[RETURN:%.*]]
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[X]], align 1
+; CHECK-NEXT: [[TMP19:%.*]] = xor i32 [[TMP18]], 858927408
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; CHECK-NEXT: [[TMP21:%.*]] = load i8, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = zext i8 [[TMP21]] to i32
+; CHECK-NEXT: [[TMP23:%.*]] = xor i32 [[TMP22]], 63
+; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP19]], [[TMP23]]
+; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
+; CHECK-NEXT: [[TMP26:%.*]] = zext i1 [[TMP25]] to i32
+; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i32 [[TMP26]], 0
+; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[IF_END8]], label [[RETURN:%.*]]
; CHECK: if.end8:
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll b/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll
index 68dfacac5b5e12..de90aec1a49c78 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll
@@ -42,14 +42,14 @@ define i1 @length0_lt(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i32 @length2(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i32 @length2(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]])
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
-; CHECK-NEXT: [[TMP7:%.*]] = sub nsw i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
; CHECK-NEXT: ret i32 [[TMP7]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #0
@@ -59,11 +59,13 @@ define i32 @length2(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length2_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length2_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i16 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: ret i1 [[DOTNOT]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #0
%c = icmp eq i32 %m, 0
@@ -73,12 +75,15 @@ define i1 @length2_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length2_lt(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length2_lt(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]])
-; CHECK-NEXT: [[C:%.*]] = icmp ult i16 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0
; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #0
@@ -89,12 +94,15 @@ define i1 @length2_lt(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length2_gt(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length2_gt(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]])
-; CHECK-NEXT: [[C:%.*]] = icmp ugt i16 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0
; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #0
@@ -105,7 +113,7 @@ define i1 @length2_gt(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length2_eq_const(ptr %X) #0 {
; CHECK-LABEL: define i1 @length2_eq_const(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
; CHECK-NEXT: ret i1 [[TMP2]]
@@ -118,7 +126,7 @@ define i1 @length2_eq_const(ptr %X) #0 {
; Function Attrs: nounwind
define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length2_eq_nobuiltin_attr(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]]
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
; CHECK-NEXT: ret i1 [[C]]
@@ -131,16 +139,16 @@ define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i32 @length3(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i32 @length3(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: loadbb:
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[X]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: br i1 [[TMP2]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; CHECK: res_block:
+; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP1]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP0]])
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i16 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i16 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i16 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 -1, i32 1
; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
; CHECK: loadbb1:
@@ -150,7 +158,7 @@ define i32 @length3(ptr %X, ptr %Y) #0 {
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
-; CHECK-NEXT: [[TMP13:%.*]] = sub nsw i32 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP12]]
; CHECK-NEXT: br label [[ENDBLOCK]]
; CHECK: endblock:
; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP13]], [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ]
@@ -163,19 +171,20 @@ define i32 @length3(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length3_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length3_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = xor i16 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = xor i8 [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i16
-; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP3]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i16 [[TMP10]], 0
-; CHECK-NEXT: ret i1 [[TMP11]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; CHECK-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[TMP12]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) #0
%c = icmp ne i32 %m, 0
@@ -185,7 +194,7 @@ define i1 @length3_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i32 @length4(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i32 @length4(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]])
@@ -193,9 +202,9 @@ define i32 @length4(ptr %X, ptr %Y) #0 {
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
-; CHECK-NEXT: [[DOTNEG:%.*]] = sext i1 [[TMP6]] to i32
-; CHECK-NEXT: [[TMP8:%.*]] = add nsw i32 [[DOTNEG]], [[TMP7]]
-; CHECK-NEXT: ret i32 [[TMP8]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: ret i32 [[TMP9]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) #0
ret i32 %m
@@ -204,7 +213,7 @@ define i32 @length4(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length4_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length4_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
@@ -218,7 +227,7 @@ define i1 @length4_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length4_lt(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length4_lt(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]])
@@ -234,7 +243,7 @@ define i1 @length4_lt(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length4_gt(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length4_gt(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]])
@@ -250,10 +259,12 @@ define i1 @length4_gt(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length4_eq_const(ptr %X) #0 {
; CHECK-LABEL: define i1 @length4_eq_const(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 875770417
-; CHECK-NEXT: ret i1 [[DOTNOT]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) #0
%c = icmp eq i32 %m, 0
@@ -263,16 +274,16 @@ define i1 @length4_eq_const(ptr %X) #0 {
; Function Attrs: nounwind
define i32 @length5(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i32 @length5(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: loadbb:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: br i1 [[TMP2]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; CHECK: res_block:
+; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP0]])
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 -1, i32 1
; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
; CHECK: loadbb1:
@@ -282,7 +293,7 @@ define i32 @length5(ptr %X, ptr %Y) #0 {
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
-; CHECK-NEXT: [[TMP13:%.*]] = sub nsw i32 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP12]]
; CHECK-NEXT: br label [[ENDBLOCK]]
; CHECK: endblock:
; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP13]], [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ]
@@ -295,19 +306,20 @@ define i32 @length5(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length5_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length5_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = xor i8 [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i32
-; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP3]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
-; CHECK-NEXT: ret i1 [[TMP11]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[TMP12]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) #0
%c = icmp ne i32 %m, 0
@@ -317,27 +329,31 @@ define i1 @length5_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length5_lt(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length5_lt(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: loadbb:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: br i1 [[TMP2]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; CHECK: res_block:
+; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP0]])
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; CHECK: res_block:
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 -1, i32 1
; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
; CHECK: loadbb1:
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Y]], i64 4
; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i8 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP12]]
; CHECK-NEXT: br label [[ENDBLOCK]]
; CHECK: endblock:
-; CHECK-NEXT: [[PHI_RES:%.*]] = phi i1 [ [[TMP10]], [[LOADBB1]] ], [ [[TMP5]], [[RES_BLOCK]] ]
-; CHECK-NEXT: ret i1 [[PHI_RES]]
+; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP13]], [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ]
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0
+; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) #0
%c = icmp slt i32 %m, 0
@@ -347,17 +363,18 @@ define i1 @length5_lt(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length7_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length7_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: ret i1 [[TMP9]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; CHECK-NEXT: ret i1 [[TMP10]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) #0
%c = icmp ne i32 %m, 0
@@ -367,7 +384,7 @@ define i1 @length7_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i32 @length8(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i32 @length8(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP1]])
@@ -375,9 +392,9 @@ define i32 @length8(ptr %X, ptr %Y) #0 {
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
-; CHECK-NEXT: [[DOTNEG:%.*]] = sext i1 [[TMP6]] to i32
-; CHECK-NEXT: [[TMP8:%.*]] = add nsw i32 [[DOTNEG]], [[TMP7]]
-; CHECK-NEXT: ret i32 [[TMP8]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: ret i32 [[TMP9]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) #0
ret i32 %m
@@ -386,11 +403,13 @@ define i32 @length8(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length8_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length8_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: ret i1 [[DOTNOT]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) #0
%c = icmp eq i32 %m, 0
@@ -400,7 +419,7 @@ define i1 @length8_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length8_eq_const(ptr %X) #0 {
; CHECK-LABEL: define i1 @length8_eq_const(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
; CHECK-NEXT: ret i1 [[TMP2]]
@@ -413,19 +432,22 @@ define i1 @length8_eq_const(ptr %X) #0 {
; Function Attrs: nounwind
define i1 @length9_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length9_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = xor i8 [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i64
-; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP3]], [[TMP9]]
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP10]], 0
-; CHECK-NEXT: ret i1 [[DOTNOT]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) #0
%c = icmp eq i32 %m, 0
@@ -435,19 +457,22 @@ define i1 @length9_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length10_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length10_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = xor i16 [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = zext i16 [[TMP8]] to i64
-; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP3]], [[TMP9]]
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP10]], 0
-; CHECK-NEXT: ret i1 [[DOTNOT]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) #0
%c = icmp eq i32 %m, 0
@@ -457,17 +482,20 @@ define i1 @length10_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length11_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length11_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[DOTNOT2:%.*]] = and i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: ret i1 [[DOTNOT2]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) #0
%c = icmp eq i32 %m, 0
@@ -477,19 +505,20 @@ define i1 @length11_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length12_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length12_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP3]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i64 [[TMP10]], 0
-; CHECK-NEXT: ret i1 [[TMP11]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[TMP12]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) #0
%c = icmp ne i32 %m, 0
@@ -499,13 +528,13 @@ define i1 @length12_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i32 @length12(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i32 @length12(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: loadbb:
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP1]])
-; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: br i1 [[TMP4]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; CHECK: res_block:
; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP2]], [[LOADBB:%.*]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
@@ -522,7 +551,7 @@ define i32 @length12(ptr %X, ptr %Y) #0 {
; CHECK-NEXT: [[TMP12:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP10]])
; CHECK-NEXT: [[TMP13]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[TMP13]], [[TMP14]]
; CHECK-NEXT: br i1 [[TMP15]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; CHECK: endblock:
; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ]
@@ -535,17 +564,20 @@ define i32 @length12(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length13_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length13_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 5
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Y]], i64 5
-; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[DOTNOT2:%.*]] = and i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: ret i1 [[DOTNOT2]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) #0
%c = icmp eq i32 %m, 0
@@ -555,17 +587,20 @@ define i1 @length13_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length14_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length14_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 6
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Y]], i64 6
-; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[DOTNOT2:%.*]] = and i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: ret i1 [[DOTNOT2]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) #0
%c = icmp eq i32 %m, 0
@@ -575,17 +610,20 @@ define i1 @length14_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length15_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @length15_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 7
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Y]], i64 7
-; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[DOTNOT2:%.*]] = and i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: ret i1 [[DOTNOT2]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) #0
%c = icmp eq i32 %m, 0
@@ -595,13 +633,13 @@ define i1 @length15_eq(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i32 @length16(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i32 @length16(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: loadbb:
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP1]])
-; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: br i1 [[TMP4]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; CHECK: res_block:
; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP2]], [[LOADBB:%.*]] ], [ [[TMP11:%.*]], [[LOADBB1]] ]
@@ -616,7 +654,7 @@ define i32 @length16(ptr %X, ptr %Y) #0 {
; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
; CHECK-NEXT: [[TMP11]] = tail call i64 @llvm.bswap.i64(i64 [[TMP9]])
; CHECK-NEXT: [[TMP12]] = tail call i64 @llvm.bswap.i64(i64 [[TMP10]])
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP11]], [[TMP12]]
; CHECK-NEXT: br i1 [[TMP13]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; CHECK: endblock:
; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ]
@@ -629,7 +667,7 @@ define i32 @length16(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length16_eq(ptr %x, ptr %y) #0 {
; CHECK-LABEL: define i1 @length16_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
@@ -643,10 +681,12 @@ define i1 @length16_eq(ptr %x, ptr %y) #0 {
; Function Attrs: nounwind
define i1 @length16_eq_const(ptr %X) #0 {
; CHECK-LABEL: define i1 @length16_eq_const(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i128 [[TMP1]], 70720121592765328381466889075544961328
-; CHECK-NEXT: ret i1 [[DOTNOT]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) #0
%c = icmp eq i32 %m, 0
@@ -657,7 +697,7 @@ define i1 @length16_eq_const(ptr %X) #0 {
define i32 @length24(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i32 @length24(
; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(24) [[X]], ptr noundef nonnull dereferenceable(24) [[Y]], i64 24) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(24) [[X]], ptr noundef nonnull dereferenceable(24) [[Y]], i64 24) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: ret i32 [[M]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) #0
@@ -667,19 +707,22 @@ define i32 @length24(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length24_eq(ptr %x, ptr %y) #0 {
; CHECK-LABEL: define i1 @length24_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = zext i64 [[TMP8]] to i128
-; CHECK-NEXT: [[TMP10:%.*]] = or i128 [[TMP3]], [[TMP9]]
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i128 [[TMP10]], 0
-; CHECK-NEXT: ret i1 [[DOTNOT]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128
+; CHECK-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128
+; CHECK-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) #0
%cmp = icmp eq i32 %call, 0
@@ -689,13 +732,13 @@ define i1 @length24_eq(ptr %x, ptr %y) #0 {
; Function Attrs: nounwind
define i1 @length24_eq_const(ptr %X) #0 {
; CHECK-LABEL: define i1 @length24_eq_const(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1
-; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3689065127958034230
-; CHECK-NEXT: [[TMP6:%.*]] = zext i64 [[TMP5]] to i128
+; CHECK-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128
+; CHECK-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
; CHECK-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
; CHECK-NEXT: ret i1 [[TMP8]]
@@ -709,7 +752,7 @@ define i1 @length24_eq_const(ptr %X) #0 {
define i32 @length32(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i32 @length32(
; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(32) [[X]], ptr noundef nonnull dereferenceable(32) [[Y]], i64 32) #[[ATTR5]]
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(32) [[X]], ptr noundef nonnull dereferenceable(32) [[Y]], i64 32) #[[ATTR2]]
; CHECK-NEXT: ret i32 [[M]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) #0
@@ -719,17 +762,20 @@ define i32 @length32(ptr %X, ptr %Y) #0 {
; Function Attrs: nounwind
define i1 @length32_eq(ptr %x, ptr %y) #0 {
; CHECK-LABEL: define i1 @length32_eq(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Y]], i64 16
-; CHECK-NEXT: [[TMP5:%.*]] = load i128, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16
; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i128 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i128 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[DOTNOT2:%.*]] = and i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: ret i1 [[DOTNOT2]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) #0
%cmp = icmp eq i32 %call, 0
@@ -739,14 +785,15 @@ define i1 @length32_eq(ptr %x, ptr %y) #0 {
; Function Attrs: nounwind
define i1 @length32_eq_const(ptr %X) #0 {
; CHECK-LABEL: define i1 @length32_eq_const(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[X]], i64 16
-; CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP2]], align 1
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], 65382562593882267225249597816672106294
-; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
-; CHECK-NEXT: ret i1 [[TMP6]]
+; CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16
+; CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
+; CHECK-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: ret i1 [[TMP7]]
;
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) #0
%c = icmp ne i32 %m, 0
@@ -757,7 +804,7 @@ define i1 @length32_eq_const(ptr %X) #0 {
define i32 @length64(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i32 @length64(
; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(64) [[X]], ptr noundef nonnull dereferenceable(64) [[Y]], i64 64) #[[ATTR5]]
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(64) [[X]], ptr noundef nonnull dereferenceable(64) [[Y]], i64 64) #[[ATTR2]]
; CHECK-NEXT: ret i32 [[M]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) #0
@@ -768,7 +815,7 @@ define i32 @length64(ptr %X, ptr %Y) #0 {
define i1 @length64_eq(ptr %x, ptr %y) #0 {
; CHECK-LABEL: define i1 @length64_eq(
; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(64) [[X]], ptr noundef nonnull dereferenceable(64) [[Y]], i64 64) #[[ATTR5]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(64) [[X]], ptr noundef nonnull dereferenceable(64) [[Y]], i64 64) #[[ATTR2]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
@@ -781,7 +828,7 @@ define i1 @length64_eq(ptr %x, ptr %y) #0 {
define i1 @length64_eq_const(ptr %X) #0 {
; CHECK-LABEL: define i1 @length64_eq_const(
; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(64) [[X]], ptr noundef nonnull dereferenceable(64) @.str, i64 64) #[[ATTR5]]
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(64) [[X]], ptr noundef nonnull dereferenceable(64) @.str, i64 64) #[[ATTR2]]
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
; CHECK-NEXT: ret i1 [[C]]
;
@@ -794,7 +841,7 @@ define i1 @length64_eq_const(ptr %X) #0 {
define i32 @huge_length(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i32 @huge_length(
; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(9223372036854775807) [[X]], ptr noundef nonnull dereferenceable(9223372036854775807) [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(9223372036854775807) [[X]], ptr noundef nonnull dereferenceable(9223372036854775807) [[Y]], i64 9223372036854775807) #[[ATTR2]]
; CHECK-NEXT: ret i32 [[M]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) #0
@@ -805,7 +852,7 @@ define i32 @huge_length(ptr %X, ptr %Y) #0 {
define i1 @huge_length_eq(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @huge_length_eq(
; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(9223372036854775807) [[X]], ptr noundef nonnull dereferenceable(9223372036854775807) [[Y]], i64 9223372036854775807) #[[ATTR5]]
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(9223372036854775807) [[X]], ptr noundef nonnull dereferenceable(9223372036854775807) [[Y]], i64 9223372036854775807) #[[ATTR2]]
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
; CHECK-NEXT: ret i1 [[C]]
;
@@ -818,7 +865,7 @@ define i1 @huge_length_eq(ptr %X, ptr %Y) #0 {
define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) #0 {
; CHECK-LABEL: define i32 @nonconst_length(
; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR2]]
; CHECK-NEXT: ret i32 [[M]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) #0
@@ -829,7 +876,7 @@ define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) #0 {
define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) #0 {
; CHECK-LABEL: define i1 @nonconst_length_eq(
; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]]
+; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR2]]
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0
; CHECK-NEXT: ret i1 [[C]]
;
@@ -841,11 +888,13 @@ define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) #0 {
; Function Attrs: nounwind
define i1 @bcmp_length2(ptr %X, ptr %Y) #0 {
; CHECK-LABEL: define i1 @bcmp_length2(
-; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i16 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: ret i1 [[DOTNOT]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: ret i1 [[C]]
;
%m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) #0
%c = icmp eq i32 %m, 0
>From e1e57398eb1d790237c5ca22c5b8493e4a8e2849 Mon Sep 17 00:00:00 2001
From: Gabriel Baraldi <baraldigabriel at gmail.com>
Date: Wed, 10 Jan 2024 18:18:56 +0000
Subject: [PATCH 06/11] Whitespace
---
llvm/lib/Passes/PassBuilderPipelines.cpp | 3 ++-
llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp | 5 +++--
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 391d35e968ae1c..2955cc7c68ca58 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1432,7 +1432,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// result too early.
OptimizePM.addPass(LoopSinkPass());
- // Detect anc convert memcpm like idioms to the call, and expand when profitable
+ // Detect anc convert memcpm like idioms to the call, and expand when
+ // profitable
OptimizePM.addPass(MergeICmpsPass());
OptimizePM.addPass(ExpandMemCmpPass(TM));
diff --git a/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
index ed3843de422f00..a8577b5229ed7c 100644
--- a/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
+++ b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
@@ -921,8 +921,9 @@ PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
DominatorTree *DT) {
// Sanitizers prefer that calls to memcmp remain as such
- //so that they may be itercepted, but since the sanitizer passes run late
- // we disable the optimization here. See maybeMarkSanitizerLibraryCallNoBuiltin
+ // so that they may be itercepted, but since the sanitizer passes run late
+ // we disable the optimization here. See
+ // maybeMarkSanitizerLibraryCallNoBuiltin
if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
>From ba9373d16e956b463780b57b2bca0a26ac1355af Mon Sep 17 00:00:00 2001
From: Gabriel Baraldi <baraldigabriel at gmail.com>
Date: Tue, 16 Jan 2024 11:09:23 -0300
Subject: [PATCH 07/11] Small Fixes
---
llvm/lib/Passes/PassBuilderPipelines.cpp | 14 +++++++++-----
llvm/test/Other/new-pm-defaults.ll | 6 +++---
.../test/Other/new-pm-thinlto-postlink-defaults.ll | 4 ++--
.../Other/new-pm-thinlto-postlink-pgo-defaults.ll | 4 ++--
.../new-pm-thinlto-postlink-samplepgo-defaults.ll | 4 ++--
.../Transforms/PhaseOrdering/PowerPC/lit.local.cfg | 2 +-
6 files changed, 19 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 2955cc7c68ca58..fec23d20cbb68c 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1432,19 +1432,19 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// result too early.
OptimizePM.addPass(LoopSinkPass());
- // Detect anc convert memcpm like idioms to the call, and expand when
- // profitable
- OptimizePM.addPass(MergeICmpsPass());
- OptimizePM.addPass(ExpandMemCmpPass(TM));
-
// And finally clean up LCSSA form before generating code.
OptimizePM.addPass(InstSimplifyPass());
+
// This hoists/decomposes div/rem ops. It should run after other sink/hoist
// passes to avoid re-sinking, but before SimplifyCFG because it can allow
// flattening of blocks.
OptimizePM.addPass(DivRemPairsPass());
+ // Detect and convert memcmp like idioms to the call then expand them if profitable
+ OptimizePM.addPass(MergeICmpsPass());
+ OptimizePM.addPass(ExpandMemCmpPass(TM));
+
// Try to annotate calls that were created during optimization.
OptimizePM.addPass(TailCallElimPass());
@@ -1963,6 +1963,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// flattening of blocks.
LateFPM.addPass(DivRemPairsPass());
+ // Detect and convert memcmp like idioms to the call then expand them if profitable
+ OptimizePM.addPass(MergeICmpsPass());
+ OptimizePM.addPass(ExpandMemCmpPass(TM));
+
// Delete basic blocks, which optimization passes may have killed.
LateFPM.addPass(SimplifyCFGPass(
SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index c5d70a6a0b2c33..26c8ce5fe9e5a4 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -267,10 +267,10 @@
; CHECK-O-NEXT: Running pass: LICMPass
; CHECK-O-NEXT: Running pass: AlignmentFromAssumptionsPass
; CHECK-O-NEXT: Running pass: LoopSinkPass
-; CHECK-O-NEXT: Running pass: MergeICmpsPass
-; CHECK-O-NEXT: Running pass: ExpandMemCmpPass
; CHECK-O-NEXT: Running pass: InstSimplifyPass
; CHECK-O-NEXT: Running pass: DivRemPairsPass
+; CHECK-O-NEXT: Running pass: MergeICmpsPass
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass
; CHECK-O-NEXT: Running pass: TailCallElimPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-EP-OPTIMIZER-LAST: Running pass: NoOpModulePass
@@ -315,4 +315,4 @@ loop:
br i1 %cmp, label %exit, label %loop
exit:
ret void
-}
\ No newline at end of file
+}
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index a31a1b069b0474..be4bd7dd226842 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -195,10 +195,10 @@
; CHECK-POSTLINK-O-NEXT: Running pass: LICMPass
; CHECK-POSTLINK-O-NEXT: Running pass: AlignmentFromAssumptionsPass
; CHECK-POSTLINK-O-NEXT: Running pass: LoopSinkPass
-; CHECK-POSTLINK-O-NEXT: Running pass: MergeICmpsPass
-; CHECK-POSTLINK-O-NEXT: Running pass: ExpandMemCmpPass
; CHECK-POSTLINK-O-NEXT: Running pass: InstSimplifyPass
; CHECK-POSTLINK-O-NEXT: Running pass: DivRemPairsPass
+; CHECK-POSTLINK-O-NEXT: Running pass: MergeICmpsPass
+; CHECK-POSTLINK-O-NEXT: Running pass: ExpandMemCmpPass
; CHECK-POSTLINK-O-NEXT: Running pass: TailCallElimPass
; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-POST-EP-OPT-LAST-NEXT: Running pass: NoOpModulePass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index 03a77c0d5bc8b6..a77013809ccf0e 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -180,10 +180,10 @@
; CHECK-O-NEXT: Running pass: LICMPass
; CHECK-O-NEXT: Running pass: AlignmentFromAssumptionsPass
; CHECK-O-NEXT: Running pass: LoopSinkPass
-; CHECK-O-NEXT: Running pass: MergeICmpsPass
-; CHECK-O-NEXT: Running pass: ExpandMemCmpPass
; CHECK-O-NEXT: Running pass: InstSimplifyPass
; CHECK-O-NEXT: Running pass: DivRemPairsPass
+; CHECK-O-NEXT: Running pass: MergeICmpsPass
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass
; CHECK-O-NEXT: Running pass: TailCallElimPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: GlobalDCEPass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index b5c5f0f537e774..b2c6464108d4d5 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -188,10 +188,10 @@
; CHECK-O-NEXT: Running pass: LICMPass
; CHECK-O-NEXT: Running pass: AlignmentFromAssumptionsPass
; CHECK-O-NEXT: Running pass: LoopSinkPass
-; CHECK-O-NEXT: Running pass: MergeICmpsPass
-; CHECK-O-NEXT: Running pass: ExpandMemCmpPass
; CHECK-O-NEXT: Running pass: InstSimplifyPass
; CHECK-O-NEXT: Running pass: DivRemPairsPass
+; CHECK-O-NEXT: Running pass: MergeICmpsPass
+; CHECK-O-NEXT: Running pass: ExpandMemCmpPass
; CHECK-O-NEXT: Running pass: TailCallElimPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: GlobalDCEPass
diff --git a/llvm/test/Transforms/PhaseOrdering/PowerPC/lit.local.cfg b/llvm/test/Transforms/PhaseOrdering/PowerPC/lit.local.cfg
index dfb347e640e144..091332439b1867 100644
--- a/llvm/test/Transforms/PhaseOrdering/PowerPC/lit.local.cfg
+++ b/llvm/test/Transforms/PhaseOrdering/PowerPC/lit.local.cfg
@@ -1,2 +1,2 @@
if not 'PowerPC' in config.root.targets:
- config.unsupported = True
\ No newline at end of file
+ config.unsupported = True
>From 8d670ef777478e26b0c42b7ce7d12bf4eea7b457 Mon Sep 17 00:00:00 2001
From: Gabriel Baraldi <baraldigabriel at gmail.com>
Date: Tue, 16 Jan 2024 11:33:22 -0300
Subject: [PATCH 08/11] Remove unused TM member
---
llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h | 3 +--
llvm/lib/Passes/PassBuilderPipelines.cpp | 6 +++---
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
index 94ba0cf9305040..3b5d3cab0d80ef 100644
--- a/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
+++ b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
@@ -16,10 +16,9 @@ namespace llvm {
class TargetMachine;
class ExpandMemCmpPass : public PassInfoMixin<ExpandMemCmpPass> {
- const TargetMachine *TM;
public:
- explicit ExpandMemCmpPass(const TargetMachine *TM_) : TM(TM_) {}
+ explicit ExpandMemCmpPass() {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
};
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index fec23d20cbb68c..ddede72f721556 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1443,8 +1443,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// Detect and convert memcmp like idioms to the call then expand them if profitable
OptimizePM.addPass(MergeICmpsPass());
- OptimizePM.addPass(ExpandMemCmpPass(TM));
-
+ OptimizePM.addPass(ExpandMemCmpPass());
+
// Try to annotate calls that were created during optimization.
OptimizePM.addPass(TailCallElimPass());
@@ -1965,7 +1965,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// Detect and convert memcmp like idioms to the call then expand them if profitable
OptimizePM.addPass(MergeICmpsPass());
- OptimizePM.addPass(ExpandMemCmpPass(TM));
+ OptimizePM.addPass(ExpandMemCmpPass());
// Delete basic blocks, which optimization passes may have killed.
LateFPM.addPass(SimplifyCFGPass(
>From 1a340058285eda1f8c4498bd70bb81ed181eb79f Mon Sep 17 00:00:00 2001
From: Gabriel Baraldi <baraldigabriel at gmail.com>
Date: Tue, 16 Jan 2024 11:54:19 -0300
Subject: [PATCH 09/11] Actually remote the TM from everywhere
---
llvm/lib/Passes/PassBuilderPipelines.cpp | 4 ++--
llvm/lib/Passes/PassRegistry.def | 3 +--
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index ddede72f721556..7f1ded591d6a29 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1964,8 +1964,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
LateFPM.addPass(DivRemPairsPass());
// Detect and convert memcmp like idioms to the call then expand them if profitable
- OptimizePM.addPass(MergeICmpsPass());
- OptimizePM.addPass(ExpandMemCmpPass());
+ LateFPM.addPass(MergeICmpsPass());
+ LateFPM.addPass(ExpandMemCmpPass());
// Delete basic blocks, which optimization passes may have killed.
LateFPM.addPass(SimplifyCFGPass(
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 3acb72912709f1..efb361518dc2e6 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -314,7 +314,6 @@ FUNCTION_PASS("dse", DSEPass())
FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass(TM))
FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(TM))
FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass(TM))
-FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM))
FUNCTION_PASS("fix-irreducible", FixIrreduciblePass())
FUNCTION_PASS("flattencfg", FlattenCFGPass())
FUNCTION_PASS("float2int", Float2IntPass())
@@ -359,7 +358,7 @@ FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
FUNCTION_PASS("memprof", MemProfilerPass())
FUNCTION_PASS("mergeicmps", MergeICmpsPass())
-FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM))
+FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass())
FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
>From 499ffeb175adf4fce2cd7a5b33a37ca171b1f3cb Mon Sep 17 00:00:00 2001
From: Gabriel Baraldi <baraldigabriel at gmail.com>
Date: Wed, 17 Jan 2024 17:45:42 +0000
Subject: [PATCH 10/11] Update tests that had a slight change
---
llvm/test/Other/new-pm-lto-defaults.ll | 2 ++
llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll | 10 ++++++++++
2 files changed, 12 insertions(+)
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index d451d2897f673c..1691e186fb862e 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -141,6 +141,8 @@
; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
; CHECK-O23SZ-NEXT: Running pass: LoopSink
; CHECK-O23SZ-NEXT: Running pass: DivRemPairs
+; CHECK-O23SZ-NEXT: Running pass: MergeICmpsPass
+; CHECK-O23SZ-NEXT: Running pass: ExpandMemCmpPass
; CHECK-O23SZ-NEXT: Running pass: SimplifyCFGPass
; CHECK-O23SZ-NEXT: Running pass: EliminateAvailableExternallyPass
; CHECK-O23SZ-NEXT: Running pass: GlobalDCEPass
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll b/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll
index de90aec1a49c78..a9dbf5cf4b58e6 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll
@@ -116,6 +116,7 @@ define i1 @length2_eq_const(ptr %X) #0 {
; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
; CHECK-NEXT: ret i1 [[TMP2]]
;
%m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) #0
@@ -184,6 +185,7 @@ define i1 @length3_eq(ptr %X, ptr %Y) #0 {
; CHECK-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
; CHECK-NEXT: ret i1 [[TMP12]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) #0
@@ -217,6 +219,7 @@ define i1 @length4_eq(ptr %X, ptr %Y) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
; CHECK-NEXT: ret i1 [[TMP3]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) #0
@@ -319,6 +322,7 @@ define i1 @length5_eq(ptr %X, ptr %Y) #0 {
; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
; CHECK-NEXT: ret i1 [[TMP12]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) #0
@@ -374,6 +378,7 @@ define i1 @length7_eq(ptr %X, ptr %Y) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
; CHECK-NEXT: ret i1 [[TMP10]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) #0
@@ -422,6 +427,7 @@ define i1 @length8_eq_const(ptr %X) #0 {
; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
; CHECK-NEXT: ret i1 [[TMP2]]
;
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) #0
@@ -518,6 +524,7 @@ define i1 @length12_eq(ptr %X, ptr %Y) #0 {
; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
; CHECK-NEXT: ret i1 [[TMP12]]
;
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) #0
@@ -671,6 +678,7 @@ define i1 @length16_eq(ptr %x, ptr %y) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
; CHECK-NEXT: ret i1 [[TMP3]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) #0
@@ -741,6 +749,7 @@ define i1 @length24_eq_const(ptr %X) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230
; CHECK-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
; CHECK-NEXT: ret i1 [[TMP8]]
;
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) #0
@@ -793,6 +802,7 @@ define i1 @length32_eq_const(ptr %X) #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294
; CHECK-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
; CHECK-NEXT: ret i1 [[TMP7]]
;
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) #0
>From 150e6b0093aa368475cd077958e4d85a3b811a35 Mon Sep 17 00:00:00 2001
From: Gabriel Baraldi <baraldigabriel at gmail.com>
Date: Wed, 17 Jan 2024 17:58:01 +0000
Subject: [PATCH 11/11] formatting change
---
llvm/lib/Passes/PassBuilderPipelines.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 7f1ded591d6a29..a129160671da51 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1435,13 +1435,13 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// And finally clean up LCSSA form before generating code.
OptimizePM.addPass(InstSimplifyPass());
-
// This hoists/decomposes div/rem ops. It should run after other sink/hoist
// passes to avoid re-sinking, but before SimplifyCFG because it can allow
// flattening of blocks.
OptimizePM.addPass(DivRemPairsPass());
- // Detect and convert memcmp like idioms to the call then expand them if profitable
+ // Detect and convert memcmp like idioms to the call then expand them if
+ // profitable
OptimizePM.addPass(MergeICmpsPass());
OptimizePM.addPass(ExpandMemCmpPass());
@@ -1963,7 +1963,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// flattening of blocks.
LateFPM.addPass(DivRemPairsPass());
- // Detect and convert memcmp like idioms to the call then expand them if profitable
+ // Detect and convert memcmp like idioms to the call then expand them if
+ // profitable
LateFPM.addPass(MergeICmpsPass());
LateFPM.addPass(ExpandMemCmpPass());
More information about the cfe-commits
mailing list